bookloupe-testing: bookloupe/bookloupe.c@d48f66b0ad0d

     1 /*************************************************************************/

     2 /* bookloupe--check for assorted weirdnesses in a PG candidate text file */

     3 /*                                                                       */

     4 /* Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>                  */

     5 /* Copyright 2012- J. Ali Harlow <ali@juiblex.co.uk>                     */

     6 /*                                                                       */

     7 /* This program is free software; you can redistribute it and/or modify  */

     8 /* it under the terms of the GNU General Public License as published by  */

     9 /* the Free Software Foundation; either version 2 of the License, or     */

    10 /* (at your option) any later version.                                   */

    11 /*                                                                       */

    12 /* This program is distributed in the hope that it will be useful,       */

    13 /* but WITHOUT ANY WARRANTY; without even the implied warranty of        */

    14 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the          */

    15 /* GNU General Public License for more details.                          */

    16 /*                                                                       */

    17 /* You should have received a copy of the GNU General Public License     */

    18 /* along with this program. If not, see <http://www.gnu.org/licenses/>.  */

    19 /*************************************************************************/

    21 #include <stdio.h>

    22 #include <stdlib.h>

    23 #include <string.h>

    24 #include <ctype.h>

    26 #define MAXWORDLEN    80    /* max length of one word             */

    27 #define LINEBUFSIZE 2048    /* buffer size for an input line      */

    29 #define MAX_USER_TYPOS 1000

    30 #define USERTYPO_FILE "gutcheck.typ"

    32 #ifndef MAX_PATH

    33 #define MAX_PATH 16384

    34 #endif

    36 char aline[LINEBUFSIZE];

    37 char prevline[LINEBUFSIZE];

    39 /* Common typos. */

    40 char *typo[] = {

    41     "teh", "th", "og", "fi", "ro", "adn", "yuo", "ot", "fo", "thet", "ane",

    42     "nad", "te", "ig", "acn",  "ahve", "alot", "anbd", "andt", "awya", "aywa",

    43     "bakc", "om", "btu", "byt", "cna", "cxan", "coudl", "dont", "didnt",

    44     "couldnt", "wouldnt", "doesnt", "shouldnt", "doign", "ehr", "hmi", "hse",

    45     "esle", "eyt", "fitrs", "firts", "foudn", "frmo", "fromt", "fwe", "gaurd",

    46     "gerat", "goign", "gruop", "haev", "hda", "hearign", "seeign", "sayign",

    47     "herat", "hge", "hsa", "hsi", "hte", "htere", "htese", "htey", "htis",

    48     "hvae", "hwich", "idae", "ihs", "iits", "int", "iwll", "iwth", "jsut",

    49     "loev", "sefl", "myu", "nkow", "nver", "nwe", "nwo", "ocur", "ohter",

    50     "omre", "onyl", "otehr", "otu", "owrk", "owuld", "peice", "peices",

    51     "peolpe", "peopel", "perhasp", "perhpas", "pleasent", "poeple", "porblem",

    52     "porblems", "rwite", "saidt", "saidh", "saids", "seh", "smae", "smoe",

    53     "sohw", "stnad", "stopry", "stoyr", "stpo", "tahn", "taht", "tath",

    54     "tehy", "tghe", "tghis", "theri", "theyll", "thgat", "thge", "thier",

    55     "thna", "thne", "thnig", "thnigs", "thsi", "thsoe", "thta", "timne",

    56     "tirne", "tkae", "tthe", "tyhat", "tyhe", "veyr", "vou", "vour", "vrey",

    57     "waht", "wasnt", "awtn", "watn", "wehn", "whic", "whcih", "whihc", "whta",

    58     "wihch", "wief", "wiht", "witha", "wiull", "wnat", "wnated", "wnats",

    59     "woh", "wohle", "wokr", "woudl", "wriet", "wrod", "wroet", "wroking",

    60     "wtih", "wuould", "wya", "yera", "yeras", "yersa", "yoiu", "youve",

    61     "ytou", "yuor", "abead", "ahle", "ahout", "ahove", "altbough", "balf",

    62     "bardly", "bas", "bave", "baving", "bebind", "beld", "belp", "belped",

    63     "ber", "bere", "bim", "bis", "bome", "bouse", "bowever", "buge",

    64     "dehates", "deht", "han", "hecause", "hecome", "heen", "hefore", "hegan",

    65     "hegin", "heing", "helieve", "henefit", "hetter", "hetween", "heyond",

    66     "hig", "higber", "huild", "huy", "hy", "jobn", "joh", "meanwbile",

    67     "memher", "memhers", "numher", "numhers", "perbaps", "prohlem", "puhlic",

    68     "witbout", "arn", "hin", "hirn", "wrok", "wroked", "amd", "aud",

    69     "prornise", "prornised", "modem", "bo", "heside", "chapteb", "chaptee",

    70     "se", ""

    71 };

    73 char *usertypo[MAX_USER_TYPOS];

    75 /* Common abbreviations and other OK words not to query as typos. */

    76 char *okword[] = {

    77     "mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "hmm", "h'm", "hmmm",

    78     "rd", "sh", "br", "pp", "hm", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd",

    79     "pompeii","hawaii","hawaiian", "hotbed", "heartbeat", "heartbeats",

    80     "outbid", "outbids", "frostbite", "frostbitten", ""

    81 };

    83 /* Common abbreviations that cause otherwise unexplained periods. */

    84 char *abbrev[] = {

    85     "cent", "cents", "viz", "vol", "vols", "vid", "ed", "al", "etc", "op",

    86     "cit", "deg", "min", "chap", "oz", "mme", "mlle", "mssrs", ""

    87 };

    89 /*

    90  * Two-Letter combinations that rarely if ever start words,

    91  * but are common scannos or otherwise common letter combinations.

    92  */

    93 char *nostart[] = {

    94     "hr", "hl", "cb", "sb", "tb", "wb", "tl", "tn", "rn", "lt", "tj", ""

    95 };

    97 /*

    98  * Two-Letter combinations that rarely if ever end words,

    99  * but are common scannos or otherwise common letter combinations.

   100  */

   101 char *noend[] = {

   102     "cb", "gb", "pb", "sb", "tb", "wh", "fr", "br", "qu", "tw", "gl", "fl",

   103     "sw", "gr", "sl", "cl", "iy", ""

   104 };

   106 char *markup[] = {

   107     "a", "b", "big", "blockquote", "body", "br", "center", "col", "div", "em",

   108     "font", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html", "i",

   109     "img", "li", "meta", "ol", "p", "pre", "small", "span", "strong", "sub",

   110     "sup", "table", "td", "tfoot", "thead", "title", "tr", "tt", "u", "ul", ""

   111 };

   113 char *DPmarkup[] = {

   114     "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/", "<tb>", ""

   115 };

   117 char *nocomma[] = {

   118     "the", "it's", "their", "an", "mrs", "a", "our", "that's", "its", "whose",

   119     "every", "i'll", "your", "my", "mr", "mrs", "mss", "mssrs", "ft", "pm",

   120     "st", "dr", "rd", "pp", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", "i'm",

   121     "during", "let", "toward", "among", ""

   122 };

   124 char *noperiod[] = {

   125     "every", "i'm", "during", "that's", "their", "your", "our", "my", "or",

   126     "and", "but", "as", "if", "the", "its", "it's", "until", "than", "whether",

   127     "i'll", "whose", "who", "because", "when", "let", "till", "very", "an",

   128     "among", "those", "into", "whom", "having", "thence", ""

   129 };

   131 char vowels[] = "aeiouàáâãäæèéêëìíîïòóôõöùúûü";

   133 struct {

   134     char *htmlent;

   135     char *htmlnum;

   136     char *textent;

   137 } entities[] = {

   138     "&amp;",	"&#38;",     "&",

   139     "&lt;",	"&#60;",     "<",

   140     "&gt;",	"&#62;",     ">",

   141     "&deg;",	"&#176;",    " degrees",

   142     "&pound;",	"&#163;",    "L",

   143     "&quot;",	"&#34;",     "\"", /* quotation mark = APL quote */

   144     "&OElig;",	"&#338;",    "OE", /* latin capital ligature OE */

   145     "&oelig;",	"&#339;",    "oe", /* latin small ligature oe */

   146     "&Scaron;",	"&#352;",    "S", /* latin capital letter S with caron */

   147     "&scaron;",	"&#353;",    "s", /* latin small letter s with caron */

   148     "&Yuml;",	"&#376;",    "Y", /* latin capital letter Y with diaeresis */

   149     "&circ;",	"&#710;",    "",  /* modifier letter circumflex accent */

   150     "&tilde;",	"&#732;",    "~", /* small tilde, U+02DC ISOdia */

   151     "&ensp;",	"&#8194;",   " ", /* en space, U+2002 ISOpub */

   152     "&emsp;",	"&#8195;",   " ", /* em space, U+2003 ISOpub */

   153     "&thinsp;",	"&#8201;",   " ", /* thin space, U+2009 ISOpub */

   154     "&ndash;",	"&#8211;",   "-", /* en dash, U+2013 ISOpub */

   155     "&mdash;",	"&#8212;",   "--", /* em dash, U+2014 ISOpub */

   156     "&rsquo;",	"&#8217;",   "'", /* right single quotation mark */

   157     "&sbquo;",	"&#8218;",   "'", /* single low-9 quotation mark */

   158     "&ldquo;",	"&#8220;",   "\"", /* left double quotation mark */

   159     "&rdquo;",	"&#8221;",   "\"", /* right double quotation mark */

   160     "&bdquo;",	"&#8222;",   "\"", /* double low-9 quotation mark */

   161     "&lsaquo;",	"&#8249;",   "\"", /* single left-pointing angle quotation mark */

   162     "&rsaquo;",	"&#8250;",   "\"", /* single right-pointing angle quotation mark */

   163     "&nbsp;",	"&#160;",    " ", /* no-break space = non-breaking space, */

   164     "&iexcl;",	"&#161;",    "!", /* inverted exclamation mark */

   165     "&cent;",	"&#162;",    "c", /* cent sign */

   166     "&pound;",	"&#163;",    "L", /* pound sign */

   167     "&curren;",	"&#164;",    "$", /* currency sign */

   168     "&yen;",	"&#165;",    "Y", /* yen sign = yuan sign */

   169     "&sect;",	"&#167;",    "--", /* section sign */

   170     "&uml;",	"&#168;",    " ", /* diaeresis = spacing diaeresis */

   171     "&copy;",	"&#169;",    "(C) ", /* copyright sign */

   172     "&ordf;",	"&#170;",    " ", /* feminine ordinal indicator */

   173     "&laquo;",	"&#171;",    "\"", /* left-pointing double angle quotation mark */

   174     "&shy;",	"&#173;",    "-", /* soft hyphen = discretionary hyphen */

   175     "&reg;",	"&#174;",    "(R) ", /* registered sign = registered trade mark sign */

   176     "&macr;",	"&#175;",    " ", /* macron = spacing macron = overline */

   177     "&deg;",	"&#176;",    " degrees", /* degree sign */

   178     "&plusmn;",	"&#177;",    "+-", /* plus-minus sign = plus-or-minus sign */

   179     "&sup2;",	"&#178;",    "2", /* superscript two = superscript digit two */

   180     "&sup3;",	"&#179;",    "3", /* superscript three = superscript digit three */

   181     "&acute;",	"&#180;",    " ", /* acute accent = spacing acute */

   182     "&micro;",	"&#181;",    "m", /* micro sign */

   183     "&para;",	"&#182;",    "--", /* pilcrow sign = paragraph sign */

   184     "&cedil;",	"&#184;",    " ", /* cedilla = spacing cedilla */

   185     "&sup1;",	"&#185;",    "1", /* superscript one = superscript digit one */

   186     "&ordm;",	"&#186;",    " ", /* masculine ordinal indicator */

   187     "&raquo;",	"&#187;",    "\"", /* right-pointing double angle quotation mark */

   188     "&frac14;",	"&#188;",    "1/4", /* vulgar fraction one quarter */

   189     "&frac12;",	"&#189;",    "1/2", /* vulgar fraction one half */

   190     "&frac34;",	"&#190;",    "3/4", /* vulgar fraction three quarters */

   191     "&iquest;",	"&#191;",    "?", /* inverted question mark */

   192     "&Agrave;",	"&#192;",    "A", /* latin capital letter A with grave */

   193     "&Aacute;",	"&#193;",    "A", /* latin capital letter A with acute */

   194     "&Acirc;",	"&#194;",    "A", /* latin capital letter A with circumflex */

   195     "&Atilde;",	"&#195;",    "A", /* latin capital letter A with tilde */

   196     "&Auml;",	"&#196;",    "A", /* latin capital letter A with diaeresis */

   197     "&Aring;",	"&#197;",    "A", /* latin capital letter A with ring above */

   198     "&AElig;",	"&#198;",    "AE", /* latin capital letter AE */

   199     "&Ccedil;",	"&#199;",    "C", /* latin capital letter C with cedilla */

   200     "&Egrave;",	"&#200;",    "E", /* latin capital letter E with grave */

   201     "&Eacute;",	"&#201;",    "E", /* latin capital letter E with acute */

   202     "&Ecirc;",	"&#202;",    "E", /* latin capital letter E with circumflex */

   203     "&Euml;",	"&#203;",    "E", /* latin capital letter E with diaeresis */

   204     "&Igrave;",	"&#204;",    "I", /* latin capital letter I with grave */

   205     "&Iacute;",	"&#205;",    "I", /* latin capital letter I with acute */

   206     "&Icirc;",	"&#206;",    "I", /* latin capital letter I with circumflex */

   207     "&Iuml;",	"&#207;",    "I", /* latin capital letter I with diaeresis */

   208     "&ETH;",	"&#208;",    "E", /* latin capital letter ETH */

   209     "&Ntilde;",	"&#209;",    "N", /* latin capital letter N with tilde */

   210     "&Ograve;",	"&#210;",    "O", /* latin capital letter O with grave */

   211     "&Oacute;",	"&#211;",    "O", /* latin capital letter O with acute */

   212     "&Ocirc;",	"&#212;",    "O", /* latin capital letter O with circumflex */

   213     "&Otilde;",	"&#213;",    "O", /* latin capital letter O with tilde */

   214     "&Ouml;",	"&#214;",    "O", /* latin capital letter O with diaeresis */

   215     "&times;",	"&#215;",    "*", /* multiplication sign */

   216     "&Oslash;",	"&#216;",    "O", /* latin capital letter O with stroke */

   217     "&Ugrave;",	"&#217;",    "U", /* latin capital letter U with grave */

   218     "&Uacute;",	"&#218;",    "U", /* latin capital letter U with acute */

   219     "&Ucirc;",	"&#219;",    "U", /* latin capital letter U with circumflex */

   220     "&Uuml;",	"&#220;",    "U", /* latin capital letter U with diaeresis */

   221     "&Yacute;",	"&#221;",    "Y", /* latin capital letter Y with acute */

   222     "&THORN;",	"&#222;",    "TH", /* latin capital letter THORN */

   223     "&szlig;",	"&#223;",    "sz", /* latin small letter sharp s = ess-zed */

   224     "&agrave;",	"&#224;",    "a", /* latin small letter a with grave */

   225     "&aacute;",	"&#225;",    "a", /* latin small letter a with acute */

   226     "&acirc;",	"&#226;",    "a", /* latin small letter a with circumflex */

   227     "&atilde;",	"&#227;",    "a", /* latin small letter a with tilde */

   228     "&auml;",	"&#228;",    "a", /* latin small letter a with diaeresis */

   229     "&aring;",	"&#229;",    "a", /* latin small letter a with ring above */

   230     "&aelig;",	"&#230;",    "ae", /* latin small letter ae */

   231     "&ccedil;",	"&#231;",    "c", /* latin small letter c with cedilla */

   232     "&egrave;",	"&#232;",    "e", /* latin small letter e with grave */

   233     "&eacute;",	"&#233;",    "e", /* latin small letter e with acute */

   234     "&ecirc;",	"&#234;",    "e", /* latin small letter e with circumflex */

   235     "&euml;",	"&#235;",    "e", /* latin small letter e with diaeresis */

   236     "&igrave;",	"&#236;",    "i", /* latin small letter i with grave */

   237     "&iacute;",	"&#237;",    "i", /* latin small letter i with acute */

   238     "&icirc;",	"&#238;",    "i", /* latin small letter i with circumflex */

   239     "&iuml;",	"&#239;",    "i", /* latin small letter i with diaeresis */

   240     "&eth;",	"&#240;",    "eth", /* latin small letter eth */

   241     "&ntilde;",	"&#241;",    "n", /* latin small letter n with tilde */

   242     "&ograve;",	"&#242;",    "o", /* latin small letter o with grave */

   243     "&oacute;",	"&#243;",    "o", /* latin small letter o with acute */

   244     "&ocirc;",	"&#244;",    "o", /* latin small letter o with circumflex */

   245     "&otilde;",	"&#245;",    "o", /* latin small letter o with tilde */

   246     "&ouml;",	"&#246;",    "o", /* latin small letter o with diaeresis */

   247     "&divide;",	"&#247;",    "/", /* division sign */

   248     "&oslash;",	"&#248;",    "o", /* latin small letter o with stroke */

   249     "&ugrave;",	"&#249;",    "u", /* latin small letter u with grave */

   250     "&uacute;",	"&#250;",    "u", /* latin small letter u with acute */

   251     "&ucirc;",	"&#251;",    "u", /* latin small letter u with circumflex */

   252     "&uuml;",	"&#252;",    "u", /* latin small letter u with diaeresis */

   253     "&yacute;",	"&#253;",    "y", /* latin small letter y with acute */

   254     "&thorn;",	"&#254;",    "th", /* latin small letter thorn */

   255     "&yuml;",	"&#255;",    "y", /* latin small letter y with diaeresis */

   256     "", ""

   257 };

   259 /* special characters */

   260 #define CHAR_SPACE        32

   261 #define CHAR_TAB           9

   262 #define CHAR_LF           10

   263 #define CHAR_CR           13

   264 #define CHAR_DQUOTE       34

   265 #define CHAR_SQUOTE       39

   266 #define CHAR_OPEN_SQUOTE  96

   267 #define CHAR_TILDE       126

   268 #define CHAR_ASTERISK     42

   269 #define CHAR_FORESLASH    47

   270 #define CHAR_CARAT        94

   272 #define CHAR_UNDERSCORE    '_'

   273 #define CHAR_OPEN_CBRACK   '{'

   274 #define CHAR_CLOSE_CBRACK  '}'

   275 #define CHAR_OPEN_RBRACK   '('

   276 #define CHAR_CLOSE_RBRACK  ')'

   277 #define CHAR_OPEN_SBRACK   '['

   278 #define CHAR_CLOSE_SBRACK  ']'

   280 /* longest and shortest normal PG line lengths */

   281 #define LONGEST_PG_LINE   75

   282 #define WAY_TOO_LONG      80

   283 #define SHORTEST_PG_LINE  55

   285 #define SWITCHES "ESTPXLOYHWVMUD" /* switches:-                            */

   286                                   /*     D - ignore DP-specific markup     */

   287                                   /*     E - echo queried line             */

   288                                   /*     S - check single quotes           */

   289                                   /*     T - check common typos            */

   290                                   /*     P - require closure of quotes on  */

   291                                   /*         every paragraph               */

   292                                   /*     X - "Trust no one" :-) Paranoid!  */

   293                                   /*         Queries everything            */

   294                                   /*     L - line end checking defaults on */

   295                                   /*         -L turns it off               */

   296                                   /*     O - overview. Just shows counts.  */

   297                                   /*     Y - puts errors to stdout         */

   298                                   /*         instead of stderr             */

   299                                   /*     H - Echoes header fields          */

   300                                   /*     M - Ignore markup in < >          */

   301                                   /*     U - Use file of User-defined Typos*/

   302                                   /*     W - Defaults for use on Web upload*/

   303                                   /*     V - Verbose - list EVERYTHING!    */

   304 #define SWITNO 14                 /* max number of switch parms            */

   305                                   /*        - used for defining array-size */

   306 #define MINARGS   1               /* minimum no of args excl switches      */

   307 #define MAXARGS   1               /* maximum no of args excl switches      */

   309 int pswit[SWITNO];                /* program switches set by SWITCHES      */

   311 #define ECHO_SWITCH      0

   312 #define SQUOTE_SWITCH    1

   313 #define TYPO_SWITCH      2

   314 #define QPARA_SWITCH     3

   315 #define PARANOID_SWITCH  4

   316 #define LINE_END_SWITCH  5

   317 #define OVERVIEW_SWITCH  6

   318 #define STDOUT_SWITCH    7

   319 #define HEADER_SWITCH    8

   320 #define WEB_SWITCH       9

   321 #define VERBOSE_SWITCH   10

   322 #define MARKUP_SWITCH    11

   323 #define USERTYPO_SWITCH  12

   324 #define DP_SWITCH        13

   326 long cnt_dquot;       /* for overview mode, count of doublequote queries */

   327 long cnt_squot;       /* for overview mode, count of singlequote queries */

   328 long cnt_brack;       /* for overview mode, count of brackets queries */

   329 long cnt_bin;         /* for overview mode, count of non-ASCII queries */

   330 long cnt_odd;         /* for overview mode, count of odd character queries */

   331 long cnt_long;        /* for overview mode, count of long line errors */

   332 long cnt_short;       /* for overview mode, count of short line queries */

   333 long cnt_punct;       /* for overview mode, count of punctuation and spacing queries */

   334 long cnt_dash;        /* for overview mode, count of dash-related queries */

   335 long cnt_word;        /* for overview mode, count of word queries */

   336 long cnt_html;        /* for overview mode, count of html queries */

   337 long cnt_lineend;     /* for overview mode, count of line-end queries */

   338 long cnt_spacend;     /* count of lines with space at end */

   339 long linecnt;         /* count of total lines in the file */

   340 long checked_linecnt; /* count of lines actually checked */

   342 void proghelp(void);

   343 void procfile(char *);

   345 #define LOW_THRESHOLD    0

   346 #define HIGH_THRESHOLD   1

   348 #define START 0

   349 #define END 1

   350 #define PREV 0

   351 #define NEXT 1

   352 #define FIRST_OF_PAIR 0

   353 #define SECOND_OF_PAIR 1

   355 #define MAX_WORDPAIR 1000

   357 char running_from[MAX_PATH];

   359 int mixdigit(char *);

   360 char *getaword(char *,char *);

   361 int matchword(char *,char *);

   362 char *flgets(char *,int,FILE *,long);

   363 void lowerit(char *);

   364 int gcisalpha(unsigned char);

   365 int gcisdigit(unsigned char);

   366 int gcisletter(unsigned char);

   367 char *gcstrchr(char *s,char c);

   368 void postprocess_for_HTML(char *);

   369 char *linehasmarkup(char *);

   370 char *losemarkup(char *);

   371 int tagcomp(char *,char *);

   372 char *loseentities(char *);

   373 int isroman(char *);

   374 int usertypo_count;

   375 void postprocess_for_DP(char *);

   377 char wrk[LINEBUFSIZE];

   379 #define MAX_QWORD 50

   380 #define MAX_QWORD_LENGTH 40

   381 char qword[MAX_QWORD][MAX_QWORD_LENGTH];

   382 char qperiod[MAX_QWORD][MAX_QWORD_LENGTH];

   383 signed int dupcnt[MAX_QWORD];

   385 int main(int argc,char **argv)

   386 {

   387     char *argsw,*s;

   388     int i,switno,invarg;

   389     char usertypo_file[MAX_PATH];

   390     FILE *usertypofile;

   391     if (strlen(argv[0])<sizeof(running_from))

   392 	/* save the path to the executable */

   393         strcpy(running_from,argv[0]);

   394     /* find out what directory we're running from */

   395     s=running_from+strlen(running_from);

   396     for (;*s!='/' && *s!='\\' && s>=running_from;s--)

   397         *s=0;

   398     switno=strlen(SWITCHES);

   399     for (i=switno;--i>0;)

   400         pswit[i]=0;           /* initialise switches */

   401     /*

   402      * Standard loop to extract switches.

   403      * When we come out of this loop, the arguments will be

   404      * in argv[0] upwards and the switches used will be

   405      * represented by their equivalent elements in pswit[]

   406      */

   407     while (--argc>0 && **++argv=='-')

   408         for (argsw=argv[0]+1;*argsw!='\0';argsw++)

   409             for (i=switno,invarg=1;(--i>=0) && invarg==1;)

   410                 if ((toupper(*argsw))==SWITCHES[i])

   411 		{

   412                     invarg=0;

   413                     pswit[i]=1;

   414 		}

   415     /* Paranoid checking is turned OFF, not on, by its switch */

   416     pswit[PARANOID_SWITCH]^=1;

   417     if (pswit[PARANOID_SWITCH])

   418 	/* if running in paranoid mode force typo checks as well   */

   419         pswit[TYPO_SWITCH]=pswit[TYPO_SWITCH]^1;

   420     /* Line-end checking is turned OFF, not on, by its switch */

   421     pswit[LINE_END_SWITCH]^=1;

   422     /* Echoing is turned OFF, not on, by its switch */

   423     pswit[ECHO_SWITCH]^=1;

   424     if (pswit[OVERVIEW_SWITCH])

   425 	/* just print summary; don't echo */

   426         pswit[ECHO_SWITCH]=0;

   427     /*

   428      * Web uploads - for the moment, this is really just a placeholder

   429      * until we decide what processing we really want to do on web uploads

   430      */

   431     if (pswit[WEB_SWITCH])

   432     {

   433 	/* specific override for web uploads */

   434         pswit[ECHO_SWITCH]=1;

   435         pswit[SQUOTE_SWITCH]=0;

   436         pswit[TYPO_SWITCH]=1;

   437         pswit[QPARA_SWITCH]=0;

   438         pswit[PARANOID_SWITCH]=1;

   439         pswit[LINE_END_SWITCH]=0;

   440         pswit[OVERVIEW_SWITCH]=0;

   441         pswit[STDOUT_SWITCH]=0;

   442         pswit[HEADER_SWITCH]=1;

   443         pswit[VERBOSE_SWITCH]=0;

   444         pswit[MARKUP_SWITCH]=0;

   445         pswit[USERTYPO_SWITCH]=0;

   446         pswit[DP_SWITCH]=0;

   447     }

   448     if (argc<MINARGS || argc>MAXARGS)

   449     {

   450 	/* check number of args */

   451         proghelp();

   452         return 1;

   453     }

   454     /* read in the user-defined stealth scanno list */

   455     if (pswit[USERTYPO_SWITCH])

   456     {

   457 	/* ... we were told we had one! */

   458         usertypofile=fopen(USERTYPO_FILE,"rb");

   459         if (!usertypofile)

   460 	{

   461 	    /* not in cwd. try excuteable directory. */

   462             strcpy(usertypo_file,running_from);

   463             strcat(usertypo_file,USERTYPO_FILE);

   464             usertypofile=fopen(usertypo_file,"rb");

   465             if (!usertypofile) {

   466 		/* we ain't got no user typo file! */

   467                 printf("   --> I couldn't find gutcheck.typ "

   468 		  "-- proceeding without user typos.\n");

   469 	    }

   470 	}

   471         usertypo_count=0;

   472         if (usertypofile)

   473 	{

   474 	    /* we managed to open a User Typo File! */

   475             if (pswit[USERTYPO_SWITCH])

   476 	    {

   477                 while (flgets(aline,LINEBUFSIZE-1,usertypofile,

   478 		  (long)usertypo_count))

   479 		{

   480                     if (strlen(aline)>1)

   481 		    {

   482                         if ((int)*aline>33)

   483 			{

   484                             s=malloc(strlen(aline)+1);

   485                             if (!s)

   486 			    {

   487                                 fprintf(stderr,"bookloupe: cannot get enough "

   488 				  "memory for user typo file!\n");

   489                                 exit(1);

   490 			    }

   491                             strcpy(s,aline);

   492                             usertypo[usertypo_count]=s;

   493                             usertypo_count++;

   494                             if (usertypo_count>=MAX_USER_TYPOS)

   495 			    {

   496                                 printf("   --> Only %d user-defined typos "

   497 				  "allowed: ignoring the rest\n",

   498 				  MAX_USER_TYPOS);

   499                                 break;

   500 			    }

   501 			}

   502 		    }

   503 		}

   504 	    }

   505             fclose(usertypofile);

   506 	}

   507     }

   508     fprintf(stderr,"bookloupe: Check and report on an e-text\n");

   509     cnt_dquot=cnt_squot=cnt_brack=cnt_bin=cnt_odd=cnt_long=

   510     cnt_short=cnt_punct=cnt_dash=cnt_word=cnt_html=cnt_lineend=

   511     cnt_spacend=0;

   512     procfile(argv[0]);

   513     if (pswit[OVERVIEW_SWITCH])

   514     {

   515 	printf("    Checked %ld lines of %ld (head+foot = %ld)\n\n",

   516 	  checked_linecnt,linecnt,linecnt-checked_linecnt);

   517         printf("    --------------- Queries found --------------\n");

   518         if (cnt_long)

   519 	    printf("    Long lines:                    %14ld\n",cnt_long);

   520         if (cnt_short)

   521 	    printf("    Short lines:                   %14ld\n",cnt_short);

   522         if (cnt_lineend)

   523 	    printf("    Line-end problems:             %14ld\n",cnt_lineend);

   524         if (cnt_word)

   525 	    printf("    Common typos:                  %14ld\n",cnt_word);

   526         if (cnt_dquot)

   527 	    printf("    Unmatched quotes:              %14ld\n",cnt_dquot);

   528         if (cnt_squot)

   529 	    printf("    Unmatched SingleQuotes:        %14ld\n",cnt_squot);

   530         if (cnt_brack)

   531 	    printf("    Unmatched brackets:            %14ld\n",cnt_brack);

   532         if (cnt_bin)

   533 	    printf("    Non-ASCII characters:          %14ld\n",cnt_bin);

   534         if (cnt_odd)

   535 	    printf("    Proofing characters:           %14ld\n",cnt_odd);

   536         if (cnt_punct)

   537 	    printf("    Punctuation & spacing queries: %14ld\n",cnt_punct);

   538         if (cnt_dash)

   539 	    printf("    Non-standard dashes:           %14ld\n",cnt_dash);

   540         if (cnt_html)

   541 	    printf("    Possible HTML tags:            %14ld\n",cnt_html);

   542         printf("\n");

   543         printf("    TOTAL QUERIES                  %14ld\n",

   544           cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+

   545           cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend);

   546     }

   547     return 0;

   548 }

   550 struct first_pass_results {

   551     long firstline,astline;

   552     long footerline,totlen,binlen,alphalen,endquote_count,shortline,dotcomma;

   553     long fslashline,hyphens,longline,verylongline,htmcount,standalone_digit;

   554     long spacedash,emdash,space_emdash,non_PG_space_emdash,PG_space_emdash;

   555     signed int Dutchcount,Frenchcount;

   556 };

   558 /*

   559  * first_pass:

   560  *

   561  * Run a first pass - verify that it's a valid PG

   562  * file, decide whether to report some things that

   563  * occur many times in the text like long or short

   564  * lines, non-standard dashes, etc.

   565  */

   566 struct first_pass_results *first_pass(FILE *infile)

   567 {

   568     char laststart=CHAR_SPACE,*s;

   569     signed int i,llen;

   570     unsigned int lastlen=0,lastblen=0;

   571     long spline=0,nspline=0;

   572     static struct first_pass_results results={0};

   573     char inword[MAXWORDLEN]="";

   574     while (fgets(aline,LINEBUFSIZE-1,infile))

   575     {

   576         while (aline[strlen(aline)-1]==10 || aline[strlen(aline)-1]==13)

   577 	    aline[strlen(aline)-1]=0;

   578         linecnt++;

   579         if (strstr(aline,"*END") && strstr(aline,"SMALL PRINT") &&

   580 	  (strstr(aline,"PUBLIC DOMAIN") || strstr(aline,"COPYRIGHT")))

   581 	{

   582             if (spline)

   583                 printf("   --> Duplicate header?\n");

   584             spline=linecnt+1;   /* first line of non-header text, that is */

   585 	}

   586         if (!strncmp(aline,"*** START",9) && strstr(aline,"PROJECT GUTENBERG"))

   587 	{

   588             if (nspline)

   589                 printf("   --> Duplicate header?\n");

   590             nspline=linecnt+1;   /* first line of non-header text, that is */

   591 	}

   592         if (spline || nspline)

   593 	{

   594             lowerit(aline);

   595             if (strstr(aline,"end") && strstr(aline,"project gutenberg"))

   596 	    {

   597                 if (strstr(aline,"end")<strstr(aline,"project gutenberg"))

   598 		{

   599                     if (results.footerline)

   600 		    {

   601 			/* it's an old-form header - we can detect duplicates */

   602                         if (!nspline)

   603                             printf("   --> Duplicate footer?\n");

   604 		    }

   605                     else

   606                         results.footerline=linecnt;

   607 		}

   608 	    }

   609 	}

   610         if (spline)

   611 	    results.firstline=spline;

   612         if (nspline)

   613 	    results.firstline=nspline;  /* override with new */

   614         if (results.footerline)

   615 	    continue;    /* don't count the boilerplate in the footer */

   616         llen=strlen(aline);

   617         results.totlen+=llen;

   618         for (i=0;i<llen;i++)

   619 	{

   620             if ((unsigned char)aline[i]>127)

   621 		results.binlen++;

   622             if (gcisalpha(aline[i]))

   623 		results.alphalen++;

   624             if (i>0 && aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))

   625 		results.endquote_count++;

   626 	}

   627         if (strlen(aline)>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE &&

   628 	  lastblen>2 && lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE)

   629 	    results.shortline++;

   630         if (*aline && (unsigned char)aline[strlen(aline)-1]<=CHAR_SPACE)

   631 	    cnt_spacend++;

   632         if (strstr(aline,".,"))

   633 	    results.dotcomma++;

   634         /* only count ast lines for ignoring purposes where there is */

   635         /* locase text on the line */

   636         if (strstr(aline,"*"))

   637 	{

   638             for (s=aline;*s;s++)

   639                 if (*s>='a' && *s<='z')

   640                     break;

   641              if (*s)

   642 		results.astline++;

   643 	}

   644         if (strstr(aline,"/"))

   645             results.fslashline++;

   646         for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)

   647 	    ;

   648         if (aline[i]=='-' && aline[i-1]!='-')

   649 	    results.hyphens++;

   650         if (llen>LONGEST_PG_LINE)

   651 	    results.longline++;

   652         if (llen>WAY_TOO_LONG)

   653 	    results.verylongline++;

   654         if (strstr(aline,"<") && strstr(aline,">"))

   655 	{

   656             i=(signed int)(strstr(aline,">")-strstr(aline,"<")+1);

   657             if (i>0)

   658                 results.htmcount++;

   659             if (strstr(aline,"<i>"))

   660 		results.htmcount+=4; /* bonus marks! */

   661 	}

   662         /* Check for spaced em-dashes */

   663         if (strstr(aline,"--"))

   664 	{

   665             results.emdash++;

   666             if (*(strstr(aline,"--")-1)==CHAR_SPACE ||

   667                (*(strstr(aline,"--")+2)==CHAR_SPACE))

   668 		results.space_emdash++;

   669             if (*(strstr(aline,"--")-1)==CHAR_SPACE &&

   670                (*(strstr(aline,"--")+2)==CHAR_SPACE))

   671 		/* count of em-dashes with spaces both sides */

   672 		results.non_PG_space_emdash++;

   673             if (*(strstr(aline,"--")-1)!=CHAR_SPACE &&

   674                (*(strstr(aline,"--")+2)!=CHAR_SPACE))

   675 		/* count of PG-type em-dashes with no spaces */

   676 		results.PG_space_emdash++;

   677 	}

   678         for (s=aline;*s;)

   679 	{

   680             s=getaword(s,inword);

   681             if (!strcmp(inword,"hij") || !strcmp(inword,"niet"))

   682                 results.Dutchcount++;

   683             if (!strcmp(inword,"dans") || !strcmp(inword,"avec"))

   684                 results.Frenchcount++;

   685             if (!strcmp(inword,"0") || !strcmp(inword,"1"))

   686                 results.standalone_digit++;

   687 	}

   688         /* Check for spaced dashes */

   689         if (strstr(aline," -") && *(strstr(aline," -")+2)!='-')

   690 	    results.spacedash++;

   691         lastblen=lastlen;

   692         lastlen=strlen(aline);

   693         laststart=aline[0];

   694     }

   695     return &results;

   696 }

   698 struct warnings {

   699     signed int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen;

   700     signed int endquote,isDutch,isFrench;

   701 };

   703 /*

   704  * report_first_pass:

   705  *

   706  * Make some snap decisions based on the first pass results.

   707  */

   708 struct warnings *report_first_pass(struct first_pass_results *results)

   709 {

   710     static struct warnings warnings={0};

   711     if (cnt_spacend>0)

   712         printf("   --> %ld lines in this file have white space at end\n",

   713 	  cnt_spacend);

   714     warnings.dotcomma=1;

   715     if (results->dotcomma>5)

   716     {

   717         warnings.dotcomma=0;

   718         printf("   --> %ld lines in this file contain '.,'. "

   719 	  "Not reporting them.\n",results->dotcomma);

   720     }

   721     /*

   722      * If more than 50 lines, or one-tenth, are short,

   723      * don't bother reporting them.

   724      */

   725     warnings.shortline=1;

   726     if (results->shortline>50 || results->shortline*10>linecnt)

   727     {

   728         warnings.shortline=0;

   729         printf("   --> %ld lines in this file are short. "

   730 	  "Not reporting short lines.\n",results->shortline);

   731     }

   732     /*

   733      * If more than 50 lines, or one-tenth, are long,

   734      * don't bother reporting them.

   735      */

   736     warnings.longline=1;

   737     if (results->longline>50 || results->longline*10>linecnt)

   738     {

   739         warnings.longline=0;

   740         printf("   --> %ld lines in this file are long. "

   741 	  "Not reporting long lines.\n",results->longline);

   742     }

   743     /* If more than 10 lines contain asterisks, don't bother reporting them. */

   744     warnings.ast=1;

   745     if (results->astline>10)

   746     {

   747         warnings.ast=0;

   748         printf("   --> %ld lines in this file contain asterisks. "

   749 	  "Not reporting them.\n",results->astline);

   750     }

   751     /*

   752      * If more than 10 lines contain forward slashes,

   753      * don't bother reporting them.

   754      */

   755     warnings.fslash=1;

   756     if (results->fslashline>10)

   757     {

   758         warnings.fslash=0;

   759         printf("   --> %ld lines in this file contain forward slashes. "

   760 	  "Not reporting them.\n",results->fslashline);

   761     }

   762     /*

   763      * If more than 20 lines contain unpunctuated endquotes,

   764      * don't bother reporting them.

   765      */

   766     warnings.endquote=1;

   767     if (results->endquote_count>20)

   768     {

   769         warnings.endquote=0;

   770         printf("   --> %ld lines in this file contain unpunctuated endquotes. "

   771 	  "Not reporting them.\n",results->endquote_count);

   772     }

   773     /*

   774      * If more than 15 lines contain standalone digits,

   775      * don't bother reporting them.

   776      */

   777     warnings.digit=1;

   778     if (results->standalone_digit>10)

   779     {

   780         warnings.digit=0;

   781         printf("   --> %ld lines in this file contain standalone 0s and 1s. "

   782 	  "Not reporting them.\n",results->standalone_digit);

   783     }

   784     /*

   785      * If more than 20 lines contain hyphens at end,

   786      * don't bother reporting them.

   787      */

   788     warnings.hyphen=1;

   789     if (results->hyphens>20)

   790     {

   791         warnings.hyphen=0;

   792         printf("   --> %ld lines in this file have hyphens at end. "

   793 	  "Not reporting them.\n",results->hyphens);

   794     }

   795     if (results->htmcount>20 && !pswit[MARKUP_SWITCH])

   796     {

   797         printf("   --> Looks like this is HTML. Switching HTML mode ON.\n");

   798         pswit[MARKUP_SWITCH]=1;

   799     }

   800     if (results->verylongline>0)

   801         printf("   --> %ld lines in this file are VERY long!\n",

   802 	  results->verylongline);

   803     /*

   804      * If there are more non-PG spaced dashes than PG em-dashes,

   805      * assume it's deliberate.

   806      * Current PG guidelines say don't use them, but older texts do,

   807      * and some people insist on them whatever the guidelines say.

   808      */

   809     warnings.dash=1;

   810     if (results->spacedash+results->non_PG_space_emdash>

   811       results->PG_space_emdash)

   812     {

   813         warnings.dash=0;

   814         printf("   --> There are %ld spaced dashes and em-dashes. "

   815 	  "Not reporting them.\n",

   816 	  results->spacedash+results->non_PG_space_emdash);

   817     }

   818     /* If more than a quarter of characters are hi-bit, bug out. */

   819     warnings.bin=1;

   820     if (results->binlen*4>results->totlen)

   821     {

   822         printf("   --> This file does not appear to be ASCII. "

   823 	  "Terminating. Best of luck with it!\n");

   824         exit(1);

   825     }

   826     if (results->alphalen*4<results->totlen)

   827     {

   828         printf("   --> This file does not appear to be text. "

   829 	  "Terminating. Best of luck with it!\n");

   830         exit(1);

   831     }

   832     if (results->binlen*100>results->totlen || results->binlen>100)

   833     {

   834         printf("   --> There are a lot of foreign letters here. "

   835 	  "Not reporting them.\n");

   836         warnings.bin=0;

   837     }

   838     warnings.isDutch=0;

   839     if (results->Dutchcount>50)

   840     {

   841         warnings.isDutch=1;

   842         printf("   --> This looks like Dutch - "

   843 	  "switching off dashes and warnings for 's Middags case.\n");

   844     }

   845     warnings.isFrench=0;

   846     if (results->Frenchcount>50)

   847     {

   848         warnings.isFrench=1;

   849         printf("   --> This looks like French - "

   850 	  "switching off some doublepunct.\n");

   851     }

   852     if (results->firstline && results->footerline)

   853         printf("    The PG header and footer appear to be already on.\n");

   854     else

   855     {

   856         if (results->firstline)

   857             printf("    The PG header is on - no footer.\n");

   858         if (results->footerline)

   859             printf("    The PG footer is on - no header.\n");

   860     }

   861     printf("\n");

   862     if (pswit[VERBOSE_SWITCH])

   863     {

   864         warnings.bin=1;

   865         warnings.shortline=1;

   866         warnings.dotcomma=1;

   867         warnings.longline=1;

   868         warnings.dash=1;

   869         warnings.digit=1;

   870         warnings.ast=1;

   871         warnings.fslash=1;

   872         warnings.hyphen=1;

   873         warnings.endquote=1;

   874         printf("   *** Verbose output is ON -- you asked for it! ***\n");

   875     }

   876     if (warnings.isDutch)

   877         warnings.dash=0;

   878     if (results->footerline>0 && results->firstline>0 &&

   879       results->footerline>results->firstline &&

   880       results->footerline-results->firstline<100)

   881     {

   882         printf("   --> I don't really know where this text starts. \n");

   883         printf("       There are no reference points.\n");

   884         printf("       I'm going to have to report the header and footer "

   885 	  "as well.\n");

   886         results->firstline=0;

   887     }

   888     return &warnings;

   889 }

   891 struct counters {

   892     long quot;

   893     signed int c_unders,c_brack,s_brack,r_brack;

   894     signed int open_single_quote,close_single_quote;

   895 };

   897 /*

   898  * analyse_quotes:

   899  *

   900  * Look along the line, accumulate the count of quotes, and see

   901  * if this is an empty line - i.e. a line with nothing on it

   902  * but spaces.

   903  * If line has just spaces, period, * and/or - on it, don't

   904  * count it, since empty lines with asterisks or dashes to

   905  * separate sections are common.

   906  *

   907  * Returns: Non-zero if the line is empty.

   908  */

   909 int analyse_quotes(const char *s,struct counters *counters)

   910 {

   911     signed int guessquote=0;

   912     int isemptyline=1;    /* assume the line is empty until proven otherwise */

   913     while (*s)

   914     {

   915 	if (*s==CHAR_DQUOTE)

   916 	    counters->quot++;

   917 	if (*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE)

   918 	{

   919 	    if (s==aline)

   920 	    {

   921 		/*

   922 		 * At start of line, it can only be an openquote.

   923 		 * Hardcode a very common exception!

   924 		 */

   925 		if (strncmp(s+2,"tis",3) && strncmp(s+2,"Tis",3))

   926 		    counters->open_single_quote++;

   927 	    }

   928 	    else if (gcisalpha(s[-1]) && gcisalpha(s[1]))

   929 		/* Do nothing! it's definitely an apostrophe, not a quote */

   930 		;

   931 	    /* it's outside a word - let's check it out */

   932 	    else if (*s==CHAR_OPEN_SQUOTE || gcisalpha(s[1]))

   933 	    {

   934 		/* it damwell better BE an openquote */

   935 		if (strncmp(s+1,"tis",3) && strncmp(s+1,"Tis",3))

   936 		    /* hardcode a very common exception! */

   937 		    counters->open_single_quote++;

   938 	    }

   939 	    else

   940 	    {

   941 		/* now - is it a closequote? */

   942 		guessquote=0;   /* accumulate clues */

   943 		if (gcisalpha(s[-1]))

   944 		{

   945 		    /* it follows a letter - could be either */

   946 		    guessquote++;

   947 		    if (s[-1]=='s')

   948 		    {

   949 			/* looks like a plural apostrophe */

   950 			guessquote-=3;

   951 			if (s[1]==CHAR_SPACE)  /* bonus marks! */

   952 			    guessquote-=2;

   953 		    }

   954 		}

   955 		/* it doesn't have a letter either side */

   956 		else if (strchr(".?!,;:",s[-1]) && strchr(".?!,;: ",s[1]))

   957 		    guessquote+=8; /* looks like a closequote */

   958 		else

   959 		    guessquote++;

   960 		if (counters->open_single_quote>counters->close_single_quote)

   961 		    /*

   962 		     * Give it the benefit of some doubt,

   963 		     * if a squote is already open.

   964 		     */

   965 		    guessquote++;

   966 		else

   967 		    guessquote--;

   968 		if (guessquote>=0)

   969 		    counters->close_single_quote++;

   970 	    }

   971 	}

   972 	if (*s!=CHAR_SPACE && *s!='-' && *s!='.' && *s!=CHAR_ASTERISK &&

   973 	  *s!=13 && *s!=10)

   974 	    isemptyline=0;  /* ignore lines like  *  *  *  as spacers */

   975 	if (*s==CHAR_UNDERSCORE)

   976 	    counters->c_unders++;

   977 	if (*s==CHAR_OPEN_CBRACK)

   978 	    counters->c_brack++;

   979 	if (*s==CHAR_CLOSE_CBRACK)

   980 	    counters->c_brack--;

   981 	if (*s==CHAR_OPEN_RBRACK)

   982 	    counters->r_brack++;

   983 	if (*s==CHAR_CLOSE_RBRACK)

   984 	    counters->r_brack--;

   985 	if (*s==CHAR_OPEN_SBRACK)

   986 	    counters->s_brack++;

   987 	if (*s==CHAR_CLOSE_SBRACK)

   988 	    counters->s_brack--;

   989 	s++;

   990     }

   991     return isemptyline;

   992 }

   994 /*

   995  * check_for_odd_characters:

   996  *

   997  * Check for binary and other odd characters.

   998  */

   999 void check_for_odd_characters(const char *aline,const struct warnings *warnings,

  1000   int isemptyline)

  1001 {

  1002     /* Don't repeat multiple warnings on one line. */

  1003     signed int eNon_A=0,eTab=0,eTilde=0,eCarat=0,eFSlash=0,eAst=0;

  1004     const char *s;

  1005     unsigned char c;

  1006     for (s=aline;*s;s++)

  1007     {

  1008 	c=*(unsigned char *)s;

  1009 	if (!eNon_A && (*s<CHAR_SPACE && *s!=9 && *s!='\n' || c>127))

  1010 	{

  1011 	    if (pswit[ECHO_SWITCH])

  1012 		printf("\n%s\n",aline);

  1013 	    if (!pswit[OVERVIEW_SWITCH])

  1014 		if (c>127 && c<160)

  1015 		    printf("    Line %ld column %d - "

  1016 		      "Non-ISO-8859 character %d\n",linecnt,(int)(s-aline)+1,c);

  1017 		else

  1018 		    printf("    Line %ld column %d - Non-ASCII character %d\n",

  1019 		      linecnt,(int)(s-aline)+1,c);

  1020 	    else

  1021 		cnt_bin++;

  1022 	    eNon_A=1;

  1023 	}

  1024 	if (!eTab && *s==CHAR_TAB)

  1025 	{

  1026 	    if (pswit[ECHO_SWITCH])

  1027 		printf("\n%s\n",aline);

  1028 	    if (!pswit[OVERVIEW_SWITCH])

  1029 		printf("    Line %ld column %d - Tab character?\n",

  1030 		  linecnt,(int)(s-aline)+1);

  1031 	    else

  1032 		cnt_odd++;

  1033 	    eTab=1;

  1034 	}

  1035 	if (!eTilde && *s==CHAR_TILDE)

  1036 	{

  1037 	    /*

  1038 	     * Often used by OCR software to indicate an

  1039 	     * unrecognizable character.

  1040 	     */

  1041 	    if (pswit[ECHO_SWITCH])

  1042 		printf("\n%s\n",aline);

  1043 	    if (!pswit[OVERVIEW_SWITCH])

  1044 		printf("    Line %ld column %d - Tilde character?\n",

  1045 		  linecnt,(int)(s-aline)+1);

  1046 	    else

  1047 		cnt_odd++;

  1048 	    eTilde=1;

  1049 	}

  1050 	if (!eCarat && *s==CHAR_CARAT)

  1051 	{

  1052 	    if (pswit[ECHO_SWITCH])

  1053 		printf("\n%s\n",aline);

  1054 	    if (!pswit[OVERVIEW_SWITCH])

  1055 		printf("    Line %ld column %d - Carat character?\n",

  1056 		  linecnt,(int)(s-aline)+1);

  1057 	    else

  1058 		cnt_odd++;

  1059 	    eCarat=1;

  1060 	}

  1061 	if (!eFSlash && *s==CHAR_FORESLASH && warnings->fslash)

  1062 	{

  1063 	    if (pswit[ECHO_SWITCH])

  1064 		printf("\n%s\n",aline);

  1065 	    if (!pswit[OVERVIEW_SWITCH])

  1066 		printf("    Line %ld column %d - Forward slash?\n",

  1067 		  linecnt,(int)(s-aline)+1);

  1068 	    else

  1069 		cnt_odd++;

  1070 	    eFSlash=1;

  1071 	}

  1072 	/*

  1073 	 * Report asterisks only in paranoid mode,

  1074 	 * since they're often deliberate.

  1075 	 */

  1076 	if (!eAst && pswit[PARANOID_SWITCH] && warnings->ast && !isemptyline &&

  1077 	  *s==CHAR_ASTERISK)

  1078 	{

  1079 	    if (pswit[ECHO_SWITCH])

  1080 		printf("\n%s\n",aline);

  1081 	    if (!pswit[OVERVIEW_SWITCH])

  1082 		printf("    Line %ld column %d - Asterisk?\n",

  1083 		  linecnt,(int)(s-aline)+1);

  1084 	    else

  1085 		cnt_odd++;

  1086 	    eAst=1;

  1087 	}

  1088     }

  1089 }

  1091 /*

  1092  * check_for_long_line:

  1093  *

  1094  * Check for line too long.

  1095  */

  1096 void check_for_long_line(const char *aline)

  1097 {

  1098     if (strlen(aline)>LONGEST_PG_LINE)

  1099     {

  1100 	if (pswit[ECHO_SWITCH])

  1101 	    printf("\n%s\n",aline);

  1102 	if (!pswit[OVERVIEW_SWITCH])

  1103 	    printf("    Line %ld column %d - Long line %d\n",

  1104 	      linecnt,strlen(aline),strlen(aline));

  1105 	else

  1106 	    cnt_long++;

  1107     }

  1108 }

  1110 struct line_properties {

  1111     unsigned int len,blen;

  1112     char start;

  1113 };

  1115 /*

  1116  * check_for_short_line:

  1117  *

  1118  * Check for line too short.

  1119  *

  1120  * This one is a bit trickier to implement: we don't want to

  1121  * flag the last line of a paragraph for being short, so we

  1122  * have to wait until we know that our current line is a

  1123  * "normal" line, then report the _previous_ line if it was too

  1124  * short. We also don't want to report indented lines like

  1125  * chapter heads or formatted quotations. We therefore keep

  1126  * last->len as the length of the last line examined, and

  1127  * last->blen as the length of the last but one, and try to

  1128  * suppress unnecessary warnings by checking that both were of

  1129  * "normal" length. We keep the first character of the last

  1130  * line in last->start, and if it was a space, we assume that

  1131  * the formatting is deliberate. I can't figure out a way to

  1132  * distinguish something like a quoted verse left-aligned or

  1133  * the header or footer of a letter from a paragraph of short

  1134  * lines - maybe if I examined the whole paragraph, and if the

  1135  * para has less than, say, 8 lines and if all lines are short,

  1136  * then just assume it's OK? Need to look at some texts to see

  1137  * how often a formula like this would get the right result.

  1138  */

  1139 void check_for_short_line(const char *aline,const struct line_properties *last)

  1140 {

  1141     if (strlen(aline)>1 && last->len>1 && last->len<SHORTEST_PG_LINE &&

  1142       last->blen>1 && last->blen>SHORTEST_PG_LINE && last->start!=CHAR_SPACE)

  1143     {

  1144 	if (pswit[ECHO_SWITCH])

  1145 	    printf("\n%s\n",prevline);

  1146 	if (!pswit[OVERVIEW_SWITCH])

  1147 	    printf("    Line %ld column %d - Short line %d?\n",

  1148 	      linecnt-1,strlen(prevline),strlen(prevline));

  1149 	else

  1150 	    cnt_short++;

  1151     }

  1152 }

  1154 /*

  1155  * procfile:

  1156  *

  1157  * Process one file.

  1158  */

  1159 void procfile(char *filename)

  1160 {

  1161     char *s,*t,*s1,*wordstart;

  1162     char inword[MAXWORDLEN],testword[MAXWORDLEN];

  1163     char parastart[81];     /* first line of current para */

  1164     FILE *infile;

  1165     struct first_pass_results *first_pass_results;

  1166     struct warnings *warnings;

  1167     struct counters counters={0};

  1168     struct line_properties last={0};

  1169     int isemptyline;

  1170     long squot,start_para_line;

  1171     signed int i,j,llen,isacro,isellipsis,istypo,alower;

  1172     signed int dquotepar,squotepar;

  1173     signed int isnewpara,vowel,consonant;

  1174     char dquote_err[80],squote_err[80],rbrack_err[80],sbrack_err[80],

  1175       cbrack_err[80],unders_err[80];

  1176     signed int qword_index,qperiod_index,isdup;

  1177     signed int enddash;

  1178     last.start=CHAR_SPACE;

  1179     *dquote_err=*squote_err=*rbrack_err=*cbrack_err=*sbrack_err=

  1180       *unders_err=*prevline=0;

  1181     linecnt=checked_linecnt=start_para_line=0;

  1182     squot=0;

  1183     i=llen=isacro=isellipsis=istypo=0;

  1184     isnewpara=vowel=consonant=enddash=0;

  1185     qword_index=qperiod_index=isdup=0;

  1186     *inword=*testword=0;

  1187     dquotepar=squotepar=0;

  1188     for (j=0;j<MAX_QWORD;j++)

  1189     {

  1190         dupcnt[j]=0;

  1191         for (i=0;i<MAX_QWORD_LENGTH;i++)

  1192 	{

  1193             qword[i][j]=0;

  1194             qperiod[i][j]=0;

  1195 	}

  1196     }

  1197     infile=fopen(filename,"rb");

  1198     if (!infile)

  1199     {

  1200         if (pswit[STDOUT_SWITCH])

  1201             fprintf(stdout,"bookloupe: cannot open %s\n",filename);

  1202         else

  1203             fprintf(stderr,"bookloupe: cannot open %s\n",filename);

  1204 	exit(1);

  1205     }

  1206     fprintf(stdout,"\n\nFile: %s\n\n",filename);

  1207     first_pass_results=first_pass(infile);

  1208     warnings=report_first_pass(first_pass_results);

  1209     rewind(infile);

  1210     /*

  1211      * Here we go with the main pass. Hold onto yer hat!

  1212      * Re-init some variables we've dirtied.

  1213      */

  1214     squot=linecnt=0;

  1215     while (flgets(aline,LINEBUFSIZE-1,infile,linecnt+1))

  1216     {

  1217         linecnt++;

  1218         if (linecnt==1)

  1219 	    isnewpara=1;

  1220         if (pswit[DP_SWITCH] && !strncmp(aline,"-----File: ",11))

  1221 	    continue;    // skip DP page separators completely

  1222         if (linecnt<first_pass_results->firstline ||

  1223 	  (first_pass_results->footerline>0 &&

  1224 	  linecnt>first_pass_results->footerline))

  1225 	{

  1226             if (pswit[HEADER_SWITCH])

  1227 	    {

  1228                 if (!strncmp(aline,"Title:",6))

  1229                     printf("    %s\n",aline);

  1230                 if (!strncmp(aline,"Author:",7))

  1231                     printf("    %s\n",aline);

  1232                 if (!strncmp(aline,"Release Date:",13))

  1233                     printf("    %s\n",aline);

  1234                 if (!strncmp(aline,"Edition:",8))

  1235                     printf("    %s\n\n",aline);

  1236 	    }

  1237             continue;                /* skip through the header */

  1238 	}

  1239         checked_linecnt++;

  1240         s=aline;

  1241         /*

  1242 	 * If we are in a state of unbalanced quotes, and this line

  1243          * doesn't begin with a quote, output the stored error message.

  1244          * If the -P switch was used, print the warning even if the

  1245          * new para starts with quotes.

  1246 	 */

  1247         t=s;

  1248         while (*t==' ')

  1249 	    t++;

  1250         if (*dquote_err)

  1251             if (*t!=CHAR_DQUOTE || pswit[QPARA_SWITCH])

  1252 	    {

  1253                 if (!pswit[OVERVIEW_SWITCH])

  1254 		{

  1255                     if (pswit[ECHO_SWITCH])

  1256 			printf("\n%s\n",parastart);

  1257                     printf(dquote_err);

  1258 		}

  1259                 else

  1260                     cnt_dquot++;

  1261             }

  1262         if (*squote_err)

  1263 	{

  1264             if (*t!=CHAR_SQUOTE && *t!=CHAR_OPEN_SQUOTE ||

  1265 	      pswit[QPARA_SWITCH] || squot)

  1266 	    {

  1267                 if (!pswit[OVERVIEW_SWITCH])

  1268 		{

  1269                     if (pswit[ECHO_SWITCH])

  1270 			printf("\n%s\n",parastart);

  1271                     printf(squote_err);

  1272 		}

  1273                 else

  1274                     cnt_squot++;

  1275 	    }

  1276             squot=0;

  1277 	}

  1278         if (*rbrack_err)

  1279 	{

  1280             if (!pswit[OVERVIEW_SWITCH])

  1281 	    {

  1282                 if (pswit[ECHO_SWITCH])

  1283 		    printf("\n%s\n",parastart);

  1284                 printf(rbrack_err);

  1285 	    }

  1286             else

  1287                 cnt_brack++;

  1288 	}

  1289         if (*sbrack_err)

  1290 	{

  1291             if (!pswit[OVERVIEW_SWITCH])

  1292 	    {

  1293                 if (pswit[ECHO_SWITCH])

  1294 		    printf("\n%s\n",parastart);

  1295                 printf(sbrack_err);

  1296 	    }

  1297             else

  1298                 cnt_brack++;

  1299 	}

  1300         if (*cbrack_err)

  1301 	{

  1302             if (!pswit[OVERVIEW_SWITCH])

  1303 	    {

  1304                 if (pswit[ECHO_SWITCH])

  1305 		    printf("\n%s\n",parastart);

  1306                 printf(cbrack_err);

  1307 	    }

  1308             else

  1309                 cnt_brack++;

  1310 	}

  1311         if (*unders_err)

  1312 	{

  1313             if (!pswit[OVERVIEW_SWITCH])

  1314 	    {

  1315                 if (pswit[ECHO_SWITCH])

  1316 		    printf("\n%s\n",parastart);

  1317                 printf(unders_err);

  1318 	    }

  1319             else

  1320                 cnt_brack++;

  1321 	}

  1322         *dquote_err=*squote_err=*rbrack_err=*cbrack_err=

  1323 	  *sbrack_err=*unders_err=0;

  1324 	isemptyline=analyse_quotes(aline,&counters);

  1325         if (isnewpara && !isemptyline)

  1326 	{

  1327 	    /* This line is the start of a new paragraph. */

  1328             start_para_line=linecnt;

  1329 	    /* Capture its first line in case we want to report it later. */

  1330             strncpy(parastart,aline,80);

  1331             parastart[79]=0;

  1332             dquotepar=squotepar=0; /* restart the quote count */

  1333             s=aline;

  1334             while (!gcisalpha(*s) && !gcisdigit(*s) && *s)

  1335 		s++;

  1336             if (*s>='a' && *s<='z')

  1337 	    {

  1338 		/* and its first letter is lowercase */

  1339                 if (pswit[ECHO_SWITCH])

  1340 		    printf("\n%s\n",aline);

  1341                 if (!pswit[OVERVIEW_SWITCH])

  1342                     printf("    Line %ld column %d - "

  1343 		      "Paragraph starts with lower-case\n",

  1344 		      linecnt,(int)(s-aline)+1);

  1345                 else

  1346                     cnt_punct++;

  1347 	    }

  1348             isnewpara=0; /* Signal the end of new para processing. */

  1349 	}

  1350         /* Check for an em-dash broken at line end. */

  1351         if (enddash && *aline=='-')

  1352 	{

  1353             if (pswit[ECHO_SWITCH])

  1354 		printf("\n%s\n",aline);

  1355             if (!pswit[OVERVIEW_SWITCH])

  1356                 printf("    Line %ld column 1 - Broken em-dash?\n",linecnt);

  1357             else

  1358                 cnt_punct++;

  1359 	}

  1360         enddash=0;

  1361         for (s=aline+strlen(aline)-1;*s==' ' && s>aline;s--)

  1362 	    ;

  1363         if (s>=aline && *s=='-')

  1364             enddash=1;

  1365 	/*

  1366          * Check for invalid or questionable characters in the line

  1367          * Anything above 127 is invalid for plain ASCII, and

  1368          * non-printable control characters should also be flagged.

  1369          * Tabs should generally not be there.

  1370 	 */

  1371         for (s=aline;*s;s++)

  1372 	{

  1373             i=(unsigned char)*s;

  1374             if (i<CHAR_SPACE && i!=CHAR_LF && i!=CHAR_CR && i!=CHAR_TAB)

  1375 	    {

  1376                 if (pswit[ECHO_SWITCH])

  1377 		    printf("\n%s\n",aline);

  1378                 if (!pswit[OVERVIEW_SWITCH])

  1379                     printf("    Line %ld column %d - Control character %d\n",

  1380 		      linecnt,(int)(s-aline)+1,i);

  1381                 else

  1382                     cnt_bin++;

  1383 	    }

  1384 	}

  1385         if (warnings->bin)

  1386 	    check_for_odd_characters(aline,warnings,isemptyline);

  1387         if (warnings->longline)

  1388 	    check_for_long_line(aline);

  1389         if (warnings->shortline)

  1390 	    check_for_short_line(aline,&last);

  1391         last.blen=last.len;

  1392         last.len=strlen(aline);

  1393         last.start=aline[0];

  1394         /* Look for punctuation other than full ellipses at start of line. */

  1395         if (*aline && strchr(".?!,;:",aline[0]) && strncmp(". . .",aline,5))

  1396 	{

  1397 	    if (pswit[ECHO_SWITCH])

  1398 		printf("\n%s\n",aline);

  1399 	    if (!pswit[OVERVIEW_SWITCH])

  1400 		printf("    Line %ld column 1 - Begins with punctuation?\n",

  1401 		  linecnt);

  1402 	    else

  1403 		cnt_punct++;

  1404 	}

  1405         /*

  1406 	 * Check for spaced em-dashes.

  1407          * We must check _all_ occurrences of "--" on the line

  1408          * hence the loop - even if the first double-dash is OK

  1409          * there may be another that's wrong later on.

  1410 	 */

  1411         if (warnings->dash)

  1412 	{

  1413             s=aline;

  1414             while (strstr(s,"--"))

  1415 	    {

  1416                 if (*(strstr(s,"--")-1)==CHAR_SPACE ||

  1417                    (*(strstr(s,"--")+2)==CHAR_SPACE))

  1418 		{

  1419                     if (pswit[ECHO_SWITCH])

  1420 			printf("\n%s\n",aline);

  1421                     if (!pswit[OVERVIEW_SWITCH])

  1422                         printf("    Line %ld column %d - Spaced em-dash?\n",

  1423 			  linecnt,(int)(strstr(s,"--")-aline)+1);

  1424                     else

  1425                         cnt_dash++;

  1426 		}

  1427                 s=strstr(s,"--")+2;

  1428 	    }

  1429 	}

  1430         /* Check for spaced dashes. */

  1431         if (warnings->dash)

  1432 	{

  1433             if (strstr(aline," -"))

  1434 	    {

  1435                 if (*(strstr(aline," -")+2)!='-')

  1436 		{

  1437                     if (pswit[ECHO_SWITCH])

  1438 			printf("\n%s\n",aline);

  1439                     if (!pswit[OVERVIEW_SWITCH])

  1440                         printf("    Line %ld column %d - Spaced dash?\n",

  1441 			  linecnt,(int)(strstr(aline," -")-aline)+1);

  1442                     else

  1443                         cnt_dash++;

  1444 		}

  1445 	    }

  1446             else if (strstr(aline,"- "))

  1447 	    {

  1448 		if (*(strstr(aline,"- ")-1)!='-')

  1449 		{

  1450 		    if (pswit[ECHO_SWITCH])

  1451 			printf("\n%s\n",aline);

  1452 		    if (!pswit[OVERVIEW_SWITCH])

  1453 			printf("    Line %ld column %d - Spaced dash?\n",

  1454 			  linecnt,(int)(strstr(aline,"- ")-aline)+1);

  1455 		    else

  1456 			cnt_dash++;

  1457 		}

  1458 	    }

  1459 	}

  1460         /*

  1461 	 * Check for unmarked paragraphs indicated by separate speakers.

  1462          * May well be false positive:

  1463          * "Bravo!" "Wonderful!" called the crowd.

  1464          * but useful all the same.

  1465 	 */

  1466         s=wrk;

  1467         *s=0;

  1468         if (strstr(aline,"\" \""))

  1469 	    s=strstr(aline,"\" \"");

  1470         if (strstr(aline,"\"  \""))

  1471 	    s=strstr(aline,"\"  \"");

  1472         if (*s)

  1473 	{

  1474             if (pswit[ECHO_SWITCH])

  1475 		printf("\n%s\n",aline);

  1476             if (!pswit[OVERVIEW_SWITCH])

  1477                 printf("    Line %ld column %d - "

  1478 		  "Query missing paragraph break?\n",

  1479 		  linecnt,(int)(s-aline)+1);

  1480             else

  1481                 cnt_punct++;

  1482 	}

  1483         /*

  1484 	 * Check for "to he" and other easy he/be errors.

  1485          * This is a very inadequate effort on the he/be problem,

  1486          * but the phrase "to he" is always an error, whereas "to

  1487          * be" is quite common.

  1488          * Similarly, '"Quiet!", be said.' is a non-be error

  1489          * "to he" is _not_ always an error!:

  1490          *       "Where they went to he couldn't say."

  1491          * Another false positive:

  1492          *       What would "Cinderella" be without the . . .

  1493          * and another: "If he wants to he can see for himself."

  1494 	 */

  1495         s=wrk;

  1496         *s=0;

  1497         if (strstr(aline," to he "))

  1498 	    s=strstr(aline," to he ");

  1499         if (strstr(aline,"\" be "))

  1500 	    s=strstr(aline,"\" be ");

  1501         if (strstr(aline,"\", be "))

  1502 	    s=strstr(aline,"\", be ");

  1503         if (strstr(aline," is be "))

  1504 	    s=strstr(aline," is be ");

  1505         if (strstr(aline," be is "))

  1506 	    s=strstr(aline," be is ");

  1507         if (strstr(aline," was be "))

  1508 	    s=strstr(aline," was be ");

  1509         if (strstr(aline," be would "))

  1510 	    s=strstr(aline," be would ");

  1511         if (strstr(aline," be could "))

  1512 	    s=strstr(aline," be could ");

  1513         if (*s)

  1514 	{

  1515             if (pswit[ECHO_SWITCH])

  1516 		printf("\n%s\n",aline);

  1517             if (!pswit[OVERVIEW_SWITCH])

  1518                 printf("    Line %ld column %d - Query he/be error?\n",

  1519 		  linecnt,(int)(s-aline)+1);

  1520             else

  1521                 cnt_word++;

  1522 	}

  1523         s=wrk;

  1524         *s=0;

  1525         if (strstr(aline," i bad "))

  1526 	    s=strstr(aline," i bad ");

  1527         if (strstr(aline," you bad "))

  1528 	    s=strstr(aline," you bad ");

  1529         if (strstr(aline," he bad "))

  1530 	    s=strstr(aline," he bad ");

  1531         if (strstr(aline," she bad "))

  1532 	    s=strstr(aline," she bad ");

  1533         if (strstr(aline," they bad "))

  1534 	    s=strstr(aline," they bad ");

  1535         if (strstr(aline," a had "))

  1536 	    s=strstr(aline," a had ");

  1537         if (strstr(aline," the had "))

  1538 	    s=strstr(aline," the had ");

  1539         if (*s)

  1540 	{

  1541             if (pswit[ECHO_SWITCH])

  1542 		printf("\n%s\n",aline);

  1543             if (!pswit[OVERVIEW_SWITCH])

  1544                 printf("    Line %ld column %d - Query had/bad error?\n",

  1545 		  linecnt,(int)(s-aline)+1);

  1546             else

  1547                 cnt_word++;

  1548 	}

  1549         s=wrk;

  1550         *s=0;

  1551         if (strstr(aline,", hut "))

  1552 	    s=strstr(aline,", hut ");

  1553         if (strstr(aline,"; hut "))

  1554 	    s=strstr(aline,"; hut ");

  1555         if (*s)

  1556 	{

  1557             if (pswit[ECHO_SWITCH])

  1558 		printf("\n%s\n",aline);

  1559             if (!pswit[OVERVIEW_SWITCH])

  1560                 printf("    Line %ld column %d - Query hut/but error?\n",

  1561 		  linecnt,(int)(s-aline)+1);

  1562             else

  1563                 cnt_word++;

  1564 	}

  1565         /*

  1566 	 * Special case - angled bracket in front of "From" placed there by an

  1567 	 * MTA when sending an e-mail.

  1568 	 */

  1569         if (strstr(aline,">From"))

  1570 	{

  1571             if (pswit[ECHO_SWITCH])

  1572 		printf("\n%s\n",aline);

  1573             if (!pswit[OVERVIEW_SWITCH])

  1574                 printf("    Line %ld column %d - "

  1575 		  "Query angled bracket with From\n",

  1576 		  linecnt,(int)(strstr(aline,">From")-aline)+1);

  1577             else

  1578                 cnt_punct++;

  1579 	}

  1580         /*

  1581 	 * Check for a single character line -

  1582 	 * often an overflow from bad wrapping.

  1583 	 */

  1584         if (*aline && !aline[1])

  1585 	{

  1586             if (*aline=='I' || *aline=='V' || *aline=='X' || *aline=='L' ||

  1587 	      gcisdigit(*aline))

  1588                 ; /* Nothing - ignore numerals alone on a line. */

  1589             else

  1590 	    {

  1591                 if (pswit[ECHO_SWITCH])

  1592 		    printf("\n%s\n",aline);

  1593                 if (!pswit[OVERVIEW_SWITCH])

  1594                     printf("    Line %ld column 1 - "

  1595 		      "Query single character line\n",linecnt);

  1596                 else

  1597                     cnt_punct++;

  1598 	    }

  1599 	}

  1600         /* Check for I" - often should be ! */

  1601         if (strstr(aline," I\""))

  1602 	{

  1603             if (pswit[ECHO_SWITCH])

  1604 		printf("\n%s\n",aline);

  1605             if (!pswit[OVERVIEW_SWITCH])

  1606                 printf("    Line %ld column %ld - Query I=exclamation mark?\n",

  1607 		  linecnt,strstr(aline," I\"")-aline);

  1608             else

  1609                 cnt_punct++;

  1610 	}

  1611         /*

  1612 	 * Check for period without a capital letter. Cut-down from gutspell.

  1613          * Only works when it happens on a single line.

  1614 	 */

  1615         if (pswit[PARANOID_SWITCH])

  1616 	{

  1617             for (t=s=aline;strstr(t,". ");)

  1618 	    {

  1619                 t=strstr(t,". ");

  1620                 if (t==s)

  1621 		{

  1622                     t++;

  1623 		    /* start of line punctuation is handled elsewhere */

  1624                     continue;

  1625 		}

  1626                 if (!gcisalpha(t[-1]))

  1627 		{

  1628                     t++;

  1629                     continue;

  1630 		}

  1631                 if (warnings->isDutch)

  1632 		{

  1633 		    /* For Frank & Jeroen -- 's Middags case */

  1634                     if (t[2]==CHAR_SQUOTE && t[3]>='a' && t[3]<='z' &&

  1635 		      t[4]==CHAR_SPACE && t[5]>='A' && t[5]<='Z')

  1636 		    {

  1637                         t++;

  1638                         continue;

  1639 		    }

  1640 		}

  1641                 s1=t+2;

  1642                 while (*s1 && !gcisalpha(*s1) && !isdigit(*s1))

  1643                     s1++;

  1644                 if (*s1>='a' && *s1<='z')

  1645 		{

  1646 		    /* we have something to investigate */

  1647                     istypo=1;

  1648 		    /* so let's go back and find out */

  1649                     for (s1=t-1;s1>=s &&

  1650 		      (gcisalpha(*s1) || gcisdigit(*s1) || *s1==CHAR_SQUOTE &&

  1651 		      gcisalpha(s1[1]) && gcisalpha(s1[-1]));s1--)

  1652 			;

  1653                     s1++;

  1654                     for (i=0;*s1 && *s1!='.';s1++,i++)

  1655                         testword[i]=*s1;

  1656                     testword[i]=0;

  1657                     for (i=0;*abbrev[i];i++)

  1658                         if (!strcmp(testword,abbrev[i]))

  1659                             istypo=0;

  1660                     if (gcisdigit(*testword))

  1661 			istypo=0;

  1662                     if (!testword[1])

  1663 			istypo=0;

  1664                     if (isroman(testword))

  1665 			istypo=0;

  1666                     if (istypo)

  1667 		    {

  1668                         istypo=0;

  1669                         for (i=0;testword[i];i++)

  1670                             if (strchr(vowels,testword[i]))

  1671                                 istypo=1;

  1672 		    }

  1673                     if (istypo)

  1674 		    {

  1675                         isdup=0;

  1676                         if (strlen(testword)<MAX_QWORD_LENGTH &&

  1677 			  !pswit[VERBOSE_SWITCH])

  1678                             for (i=0;i<qperiod_index;i++)

  1679                                 if (!strcmp(testword,qperiod[i]))

  1680                                     isdup=1;

  1681                         if (!isdup)

  1682 			{

  1683                             if (qperiod_index<MAX_QWORD &&

  1684 			      strlen(testword)<MAX_QWORD_LENGTH)

  1685 			    {

  1686                                 strcpy(qperiod[qperiod_index],testword);

  1687                                 qperiod_index++;

  1688 			    }

  1689                             if (pswit[ECHO_SWITCH])

  1690 				printf("\n%s\n",aline);

  1691                             if (!pswit[OVERVIEW_SWITCH])

  1692                                 printf("    Line %ld column %d - "

  1693 				  "Extra period?\n",linecnt,(int)(t-aline)+1);

  1694                             else

  1695                                 cnt_punct++;

  1696 			}

  1697 		    }

  1698 		}

  1699 	    t++;

  1700 	    }

  1701 	}

  1702         if (pswit[TYPO_SWITCH])

  1703 	{

  1704             /* Check for words usually not followed by punctuation. */

  1705             for (s=aline;*s;)

  1706 	    {

  1707                 wordstart=s;

  1708                 s=getaword(s,inword);

  1709                 if (!*inword)

  1710 		    continue;

  1711                 lowerit(inword);

  1712                 for (i=0;*nocomma[i];i++)

  1713                     if (!strcmp(inword,nocomma[i]))

  1714 		    {

  1715                         if (*s==',' || *s==';' || *s==':')

  1716 			{

  1717                             if (pswit[ECHO_SWITCH])

  1718 				printf("\n%s\n",aline);

  1719                             if (!pswit[OVERVIEW_SWITCH])

  1720                                 printf("    Line %ld column %d - "

  1721 				  "Query punctuation after %s?\n",

  1722 				  linecnt,(int)(s-aline)+1,inword);

  1723                             else

  1724                                 cnt_punct++;

  1725 			}

  1726 		    }

  1727 		for (i=0;*noperiod[i];i++)

  1728                     if (!strcmp(inword,noperiod[i]))

  1729 		    {

  1730                         if (*s=='.' || *s=='!')

  1731 			{

  1732                             if (pswit[ECHO_SWITCH])

  1733 				printf("\n%s\n",aline);

  1734                             if (!pswit[OVERVIEW_SWITCH])

  1735                                 printf("    Line %ld column %d - "

  1736 				  "Query punctuation after %s?\n",

  1737 				  linecnt,(int)(s-aline)+1,inword);

  1738                             else

  1739                                 cnt_punct++;

  1740 			}

  1741 		    }

  1742 	    }

  1743 	}

  1744         /*

  1745 	 * Check for commonly mistyped words,

  1746 	 * and digits like 0 for O in a word.

  1747 	 */

  1748         for (s=aline;*s;)

  1749 	{

  1750             wordstart=s;

  1751             s=getaword(s,inword);

  1752             if (!*inword)

  1753 		continue; /* don't bother with empty lines */

  1754             if (mixdigit(inword))

  1755 	    {

  1756                 if (pswit[ECHO_SWITCH])

  1757 		    printf("\n%s\n",aline);

  1758                 if (!pswit[OVERVIEW_SWITCH])

  1759                     printf("    Line %ld column %d - Query digit in %s\n",

  1760 		      linecnt,(int)(wordstart-aline)+1,inword);

  1761                 else

  1762                     cnt_word++;

  1763 	    }

  1764             /*

  1765 	     * Put the word through a series of tests for likely typos and OCR

  1766 	     * errors.

  1767 	     */

  1768             if (pswit[TYPO_SWITCH])

  1769 	    {

  1770                 istypo=0;

  1771                 strcpy(testword,inword);

  1772                 alower=0;

  1773                 for (i=0;i<(signed int)strlen(testword);i++)

  1774 		{

  1775 		    /* lowercase for testing */

  1776                     if (testword[i]>='a' && testword[i]<='z')

  1777 			alower=1;

  1778                     if (alower && testword[i]>='A' && testword[i]<='Z')

  1779 		    {

  1780                         /*

  1781 			 * We have an uppercase mid-word. However, there are

  1782 			 * common cases:

  1783                          *   Mac and Mc like McGill

  1784                          *   French contractions like l'Abbe

  1785 			 */

  1786                         if (i==2 && testword[0]=='m' && testword[1]=='c' ||

  1787                           i==3 && testword[0]=='m' && testword[1]=='a' &&

  1788 			  testword[2]=='c' || i>0 && testword[i-1]==CHAR_SQUOTE)

  1789 			    ; /* do nothing! */

  1790                         else

  1791                             istypo=1;

  1792 		    }

  1793                     testword[i]=(char)tolower(testword[i]);

  1794 		}

  1795                 /*

  1796 		 * Check for certain unlikely two-letter combinations at word

  1797 		 * start and end.

  1798 		 */

  1799                 if (strlen(testword)>1)

  1800 		{

  1801                     for (i=0;*nostart[i];i++)

  1802                         if (!strncmp(testword,nostart[i],2))

  1803                             istypo=1;

  1804                     for (i=0;*noend[i];i++)

  1805                         if (!strncmp(testword+strlen(testword)-2,noend[i],2))

  1806                             istypo=1;

  1807 		}

  1808                 /* ght is common, gbt never. Like that. */

  1809                 if (strstr(testword,"cb"))

  1810 		    istypo=1;

  1811                 if (strstr(testword,"gbt"))

  1812 		    istypo=1;

  1813                 if (strstr(testword,"pbt"))

  1814 		    istypo=1;

  1815                 if (strstr(testword,"tbs"))

  1816 		    istypo=1;

  1817                 if (strstr(testword,"mrn"))

  1818 		    istypo=1;

  1819                 if (strstr(testword,"ahle"))

  1820 		    istypo=1;

  1821                 if (strstr(testword,"ihle"))

  1822 		    istypo=1;

  1823                 /*

  1824 		 * "TBE" does happen - like HEARTBEAT - but uncommon.

  1825                  * Also "TBI" - frostbite, outbid - but uncommon.

  1826                  * Similarly "ii" like Hawaii, or Pompeii, and in Roman

  1827 		 * numerals, but "ii" is a common scanno.

  1828 		 */

  1829                 if (strstr(testword,"tbi"))

  1830 		    istypo=1;

  1831                 if (strstr(testword,"tbe"))

  1832 		    istypo=1;

  1833                 if (strstr(testword,"ii"))

  1834 		    istypo=1;

  1835                 /*

  1836 		 * Check for no vowels or no consonants.

  1837                  * If none, flag a typo.

  1838 		 */

  1839                 if (!istypo && strlen(testword)>1)

  1840 		{

  1841                     vowel=consonant=0;

  1842                     for (i=0;testword[i];i++)

  1843 		    {

  1844                         if (testword[i]=='y' || gcisdigit(testword[i]))

  1845 			{

  1846 			    /* Yah, this is loose. */

  1847                             vowel++;

  1848                             consonant++;

  1849 			}

  1850                         else if (strchr(vowels,testword[i]))

  1851 			    vowel++;

  1852 			else

  1853 			    consonant++;

  1854 		    }

  1855                     if (!vowel || !consonant)

  1856                         istypo=1;

  1857 		}

  1858                 /*

  1859 		 * Now exclude the word from being reported if it's in

  1860                  * the okword list.

  1861 		 */

  1862                 for (i=0;*okword[i];i++)

  1863                     if (!strcmp(testword,okword[i]))

  1864                         istypo=0;

  1865                 /*

  1866 		 * What looks like a typo may be a Roman numeral.

  1867 		 * Exclude these.

  1868 		 */

  1869                 if (istypo && isroman(testword))

  1870 		    istypo=0;

  1871                 /* Check the manual list of typos. */

  1872                 if (!istypo)

  1873                     for (i=0;*typo[i];i++)

  1874                         if (!strcmp(testword,typo[i]))

  1875                             istypo=1;

  1876                 /*

  1877 		 * Check lowercase s, l, i and m - special cases.

  1878                  *   "j" - often a semi-colon gone wrong.

  1879                  *   "d" for a missing apostrophe - he d

  1880                  *   "n" for "in"

  1881 		 */

  1882                 if (!istypo && strlen(testword)==1 && strchr("slmijdn",*inword))

  1883 		    istypo=1;

  1884                 if (istypo)

  1885 		{

  1886                     isdup=0;

  1887                     if (strlen(testword)<MAX_QWORD_LENGTH &&

  1888 		      !pswit[VERBOSE_SWITCH])

  1889                         for (i=0;i<qword_index;i++)

  1890                             if (!strcmp(testword,qword[i]))

  1891 			    {

  1892                                 isdup=1;

  1893                                 ++dupcnt[i];

  1894 			    }

  1895                     if (!isdup)

  1896 		    {

  1897                         if (qword_index<MAX_QWORD &&

  1898 			  strlen(testword)<MAX_QWORD_LENGTH)

  1899 			{

  1900                             strcpy(qword[qword_index],testword);

  1901                             qword_index++;

  1902 			}

  1903                         if (pswit[ECHO_SWITCH])

  1904 			    printf("\n%s\n",aline);

  1905                         if (!pswit[OVERVIEW_SWITCH])

  1906 			{

  1907                             printf("    Line %ld column %d - Query word %s",

  1908 			      linecnt,(int)(wordstart-aline)+1,inword);

  1909                             if (strlen(testword)<MAX_QWORD_LENGTH &&

  1910 			      !pswit[VERBOSE_SWITCH])

  1911                                 printf(" - not reporting duplicates");

  1912                             printf("\n");

  1913 			}

  1914                         else

  1915                             cnt_word++;

  1916 		    }

  1917 		}

  1918 	    }

  1919 	    /* check the user's list of typos */

  1920 	    if (!istypo && usertypo_count)

  1921 		for (i=0;i<usertypo_count;i++)

  1922 		    if (!strcmp(testword,usertypo[i]))

  1923 		    {

  1924 			if (pswit[ECHO_SWITCH])

  1925 			    printf("\n%s\n",aline);

  1926 			if (!pswit[OVERVIEW_SWITCH])

  1927 			    printf("    Line %ld column %d - "

  1928 			      "Query possible scanno %s\n",

  1929 			      linecnt,(int)(wordstart-aline)+2,inword);

  1930 		    }

  1931             if (pswit[PARANOID_SWITCH] && warnings->digit)

  1932 	    {

  1933 		/* In paranoid mode, query all 0 and 1 standing alone. */

  1934                 if (!strcmp(inword,"0") || !strcmp(inword,"1"))

  1935 		{

  1936                     if (pswit[ECHO_SWITCH])

  1937 			printf("\n%s\n",aline);

  1938                     if (!pswit[OVERVIEW_SWITCH])

  1939                         printf("    Line %ld column %d - Query standalone %s\n",

  1940 			  linecnt,(int)(wordstart-aline)+2,inword);

  1941                     else

  1942                         cnt_word++;

  1943 		}

  1944 	    }

  1945 	}

  1946 	/*

  1947          * Look for added or missing spaces around punctuation and quotes.

  1948          * If there is a punctuation character like ! with no space on

  1949          * either side, suspect a missing!space. If there are spaces on

  1950          * both sides , assume a typo. If we see a double quote with no

  1951          * space or punctuation on either side of it, assume unspaced

  1952          * quotes "like"this.

  1953 	 */

  1954         llen=strlen(aline);

  1955         for (i=1;i<llen;i++)

  1956 	{

  1957 	    /* For each character in the line after the first. */

  1958             if (strchr(".?!,;:_",aline[i]))  /* if it's punctuation */

  1959 	    {

  1960 		/* we need to suppress warnings for acronyms like M.D. */

  1961                 isacro=0;

  1962 		/* we need to suppress warnings for ellipsis . . . */

  1963                 isellipsis=0;

  1964 		/* if there are letters on both sides of it or ... */

  1965                 if (gcisalpha(aline[i-1]) && gcisalpha(aline[i+1]) ||

  1966                    gcisalpha(aline[i+1]) && strchr("?!,;:",aline[i]))

  1967 		{

  1968 		    /* ...if it's strict punctuation followed by an alpha */

  1969                     if (aline[i]=='.')

  1970 		    {

  1971                         if (i>2 && aline[i-2]=='.')

  1972 			    isacro=1;

  1973                         if (i+2<llen && aline[i+2]=='.')

  1974 			    isacro=1;

  1975 		    }

  1976                     if (!isacro)

  1977 		    {

  1978                         if (pswit[ECHO_SWITCH])

  1979 			    printf("\n%s\n",aline);

  1980                         if (!pswit[OVERVIEW_SWITCH])

  1981                             printf("    Line %ld column %d - Missing space?\n",

  1982 			      linecnt,i+1);

  1983                         else

  1984                             cnt_punct++;

  1985 		    }

  1986 		}

  1987                 if (aline[i-1]==CHAR_SPACE &&

  1988 		  (aline[i+1]==CHAR_SPACE || aline[i+1]==0))

  1989 		{

  1990 		    /*

  1991 		     * If there are spaces on both sides,

  1992 		     * or space before and end of line.

  1993 		     */

  1994                     if (aline[i]=='.')

  1995 		    {

  1996                         if (i>2 && aline[i-2]=='.')

  1997 			    isellipsis=1;

  1998                         if (i+2<llen && aline[i+2]=='.')

  1999 			    isellipsis=1;

  2000 		    }

  2001                     if (!isemptyline && !isellipsis)

  2002 		    {

  2003                         if (pswit[ECHO_SWITCH])

  2004 			    printf("\n%s\n",aline);

  2005                         if (!pswit[OVERVIEW_SWITCH])

  2006                             printf("    Line %ld column %d - "

  2007 			      "Spaced punctuation?\n",linecnt,i+1);

  2008                         else

  2009                             cnt_punct++;

  2010 		    }

  2011 		}

  2012 	    }

  2013 	}

  2014         /* Split out the characters that CANNOT be preceded by space. */

  2015         llen=strlen(aline);

  2016         for (i=1;i<llen;i++)

  2017 	{

  2018 	    /* for each character in the line after the first */

  2019             if (strchr("?!,;:",aline[i]))

  2020 	    {

  2021 		/* if it's punctuation that _cannot_ have a space before it */

  2022                 if (aline[i-1]==CHAR_SPACE && !isemptyline &&

  2023 		  aline[i+1]!=CHAR_SPACE)

  2024 		{

  2025 		    /*

  2026 		     * If aline[i+1) DOES == space,

  2027 		     * it was already reported just above.

  2028 		     */

  2029                     if (pswit[ECHO_SWITCH])

  2030 			printf("\n%s\n",aline);

  2031                     if (!pswit[OVERVIEW_SWITCH])

  2032                         printf("    Line %ld column %d - Spaced punctuation?\n",

  2033 			  linecnt,i+1);

  2034                     else

  2035                         cnt_punct++;

  2036 		}

  2037 	    }

  2038 	}

  2039         /*

  2040 	 * Special case " .X" where X is any alpha.

  2041          * This plugs a hole in the acronym code above.

  2042 	 * Inelegant, but maintainable.

  2043 	 */

  2044         llen=strlen(aline);

  2045         for (i=1;i<llen;i++)

  2046 	{

  2047 	    /* for each character in the line after the first */

  2048             if (aline[i]=='.')

  2049 	    {

  2050 		/* if it's a period */

  2051                 if (aline[i-1]==CHAR_SPACE && gcisalpha(aline[i+1]))

  2052 		{

  2053 		    /*

  2054 		     * If the period follows a space and

  2055 		     * is followed by a letter.

  2056 		     */

  2057                     if (pswit[ECHO_SWITCH])

  2058 			printf("\n%s\n",aline);

  2059                     if (!pswit[OVERVIEW_SWITCH])

  2060                         printf("    Line %ld column %d - Spaced punctuation?\n",

  2061 			  linecnt,i+1);

  2062                     else

  2063                         cnt_punct++;

  2064 		}

  2065 	    }

  2066 	}

  2067         for (i=1;i<llen;i++)

  2068 	{

  2069 	    /* for each character in the line after the first */

  2070             if (aline[i]==CHAR_DQUOTE)

  2071 	    {

  2072                 if (!strchr(" _-.'`,;:!/([{?}])",aline[i-1]) &&

  2073 		  !strchr(" _-.'`,;:!/([{?}])",aline[i+1]) && aline[i+1] ||

  2074 		  !strchr(" _-([{'`",aline[i-1]) && gcisalpha(aline[i+1]))

  2075 		{

  2076 		    if (pswit[ECHO_SWITCH])

  2077 			printf("\n%s\n",aline);

  2078 		    if (!pswit[OVERVIEW_SWITCH])

  2079 			printf("    Line %ld column %d - Unspaced quotes?\n",

  2080 			  linecnt,i+1);

  2081 		    else

  2082 			cnt_punct++;

  2083 		}

  2084 	    }

  2085 	}

  2086         /* Check parity of quotes. */

  2087         for (s=aline;*s;s++)

  2088 	{

  2089             if (*s==CHAR_DQUOTE)

  2090 	    {

  2091                 if (!(dquotepar=!dquotepar))

  2092 		{

  2093 		    /* parity even */

  2094                     if (!strchr("_-.'`/,;:!?)]} ",s[1]))

  2095 		    {

  2096                         if (pswit[ECHO_SWITCH])

  2097 			    printf("\n%s\n",aline);

  2098                         if (!pswit[OVERVIEW_SWITCH])

  2099                             printf("    Line %ld column %d - "

  2100 			      "Wrongspaced quotes?\n",linecnt,(int)(s-aline)+1);

  2101                         else

  2102                             cnt_punct++;

  2103 		    }

  2104 		}

  2105                 else

  2106 		{

  2107 		    /* parity odd */

  2108                     if (!gcisalpha(s[1]) && !isdigit(s[1]) &&

  2109 		      !strchr("_-/.'`([{$",s[1]) || !s[1])

  2110 		    {

  2111                         if (pswit[ECHO_SWITCH])

  2112 			    printf("\n%s\n",aline);

  2113                         if (!pswit[OVERVIEW_SWITCH])

  2114                             printf("    Line %ld column %d - "

  2115 			      "Wrongspaced quotes?\n",linecnt,(int)(s-aline)+1);

  2116                         else

  2117                             cnt_punct++;

  2118 		    }

  2119 		}

  2120 	    }

  2121 	}

  2122 	if (*aline==CHAR_DQUOTE)

  2123 	{

  2124 	    if (strchr(",;:!?)]} ",aline[1]))

  2125 	    {

  2126 		if (pswit[ECHO_SWITCH])

  2127 		    printf("\n%s\n",aline);

  2128 		if (!pswit[OVERVIEW_SWITCH])

  2129 		    printf("    Line %ld column 1 - Wrongspaced quotes?\n",

  2130 		      linecnt);

  2131 		else

  2132 		    cnt_punct++;

  2133 	    }

  2134 	}

  2135         if (pswit[SQUOTE_SWITCH])

  2136 	{

  2137             for (s=aline;*s;s++)

  2138 	    {

  2139                 if ((*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE) &&

  2140 		  (s==aline || s>aline && !gcisalpha(s[-1]) ||

  2141 		  !gcisalpha(s[1])))

  2142 		{

  2143                     if (!(squotepar=!squotepar))

  2144 		    {

  2145 			/* parity even */

  2146                         if (!strchr("_-.'`/\",;:!?)]} ",s[1]))

  2147 			{

  2148                             if (pswit[ECHO_SWITCH])

  2149 				printf("\n%s\n",aline);

  2150                             if (!pswit[OVERVIEW_SWITCH])

  2151                                 printf("    Line %ld column %d - "

  2152 				  "Wrongspaced singlequotes?\n",

  2153 				  linecnt,(int)(s-aline)+1);

  2154                             else

  2155                                 cnt_punct++;

  2156 			}

  2157 		    }

  2158                     else

  2159 		    {

  2160 			/* parity odd */

  2161                         if (!gcisalpha(s[1]) && !isdigit(s[1]) &&

  2162 			  !strchr("_-/\".'`",s[1]) || !s[1])

  2163 			{

  2164                             if (pswit[ECHO_SWITCH])

  2165 				printf("\n%s\n",aline);

  2166                             if (!pswit[OVERVIEW_SWITCH])

  2167                                 printf("    Line %ld column %d - "

  2168 				  "Wrongspaced singlequotes?\n",

  2169 				  linecnt,(int)(s-aline)+1);

  2170                             else

  2171                                 cnt_punct++;

  2172 			}

  2173 		    }

  2174 		}

  2175 	    }

  2176 	}

  2177         /*

  2178 	 * Look for double punctuation like ,. or ,,

  2179          * Thanks to DW for the suggestion!

  2180          * In books with references, ".," and ".;" are common

  2181          * e.g. "etc., etc.," and vol. 1.; vol 3.;

  2182          * OTOH, from my initial tests, there are also fairly

  2183          * common errors. What to do? Make these cases paranoid?

  2184          * ".," is the most common, so warnings->dotcomma is used

  2185          * to suppress detailed reporting if it occurs often.

  2186 	 */

  2187         llen=strlen(aline);

  2188         for (i=0;i<llen;i++)

  2189 	{

  2190 	    /* for each punctuation character in the line */

  2191             if (strchr(".?!,;:",aline[i]) && (strchr(".?!,;:",aline[i+1])) &&

  2192 	      aline[i] && aline[i+1])

  2193 	    {

  2194 		/* followed by punctuation, it's a query, unless . . . */

  2195                 if (aline[i]==aline[i+1] && (aline[i]=='.' || aline[i]=='?' ||

  2196 		  aline[i]=='!') ||

  2197 		  !warnings->dotcomma && aline[i]=='.' && aline[i+1]==',' ||

  2198 		  warnings->isFrench && !strncmp(aline+i,",...",4) ||

  2199 		  warnings->isFrench && !strncmp(aline+i,"...,",4) ||

  2200 		  warnings->isFrench && !strncmp(aline+i,";...",4) ||

  2201 		  warnings->isFrench && !strncmp(aline+i,"...;",4) ||

  2202 		  warnings->isFrench && !strncmp(aline+i,":...",4) ||

  2203 		  warnings->isFrench && !strncmp(aline+i,"...:",4) ||

  2204 		  warnings->isFrench && !strncmp(aline+i,"!...",4) ||

  2205 		  warnings->isFrench && !strncmp(aline+i,"...!",4) ||

  2206 		  warnings->isFrench && !strncmp(aline+i,"?...",4) ||

  2207 		  warnings->isFrench && !strncmp(aline+i,"...?",4))

  2208 		{

  2209 		    if (warnings->isFrench && !strncmp(aline+i,",...",4) ||

  2210 		      warnings->isFrench && !strncmp(aline+i,"...,",4) ||

  2211 		      warnings->isFrench && !strncmp(aline+i,";...",4) ||

  2212 		      warnings->isFrench && !strncmp(aline+i,"...;",4) ||

  2213 		      warnings->isFrench && !strncmp(aline+i,":...",4) ||

  2214 		      warnings->isFrench && !strncmp(aline+i,"...:",4) ||

  2215 		      warnings->isFrench && !strncmp(aline+i,"!...",4) ||

  2216 		      warnings->isFrench && !strncmp(aline+i,"...!",4) ||

  2217 		      warnings->isFrench && !strncmp(aline+i,"?...",4) ||

  2218 		      warnings->isFrench && !strncmp(aline+i,"...?",4))

  2219 			i+=4;

  2220 		    ; /* do nothing for .. !! and ?? which can be legit */

  2221 		}

  2222                 else

  2223 		{

  2224                     if (pswit[ECHO_SWITCH])

  2225 			printf("\n%s\n",aline);

  2226                     if (!pswit[OVERVIEW_SWITCH])

  2227                         printf("    Line %ld column %d - Double punctuation?\n",

  2228 			  linecnt,i+1);

  2229                     else

  2230                         cnt_punct++;

  2231 		}

  2232 	    }

  2233 	}

  2234         s=aline;

  2235         while (strstr(s," \" "))

  2236 	{

  2237             if (pswit[ECHO_SWITCH])

  2238 		printf("\n%s\n",aline);

  2239             if (!pswit[OVERVIEW_SWITCH])

  2240                 printf("    Line %ld column %d - Spaced doublequote?\n",

  2241 		  linecnt,(int)(strstr(s," \" ")-aline+1));

  2242             else

  2243                 cnt_punct++;

  2244             s=strstr(s," \" ")+2;

  2245 	}

  2246         s=aline;

  2247         while (strstr(s," ' "))

  2248 	{

  2249             if (pswit[ECHO_SWITCH])

  2250 		printf("\n%s\n",aline);

  2251             if (!pswit[OVERVIEW_SWITCH])

  2252                 printf("    Line %ld column %d - Spaced singlequote?\n",

  2253 		  linecnt,(int)(strstr(s," ' ")-aline+1));

  2254             else

  2255                 cnt_punct++;

  2256             s=strstr(s," ' ")+2;

  2257 	}

  2258         s=aline;

  2259         while (strstr(s," ` "))

  2260 	{

  2261             if (pswit[ECHO_SWITCH])

  2262 		printf("\n%s\n",aline);

  2263             if (!pswit[OVERVIEW_SWITCH])

  2264                 printf("    Line %ld column %d - Spaced singlequote?\n",

  2265 		  linecnt,(int)(strstr(s," ` ")-aline+1));

  2266             else

  2267                 cnt_punct++;

  2268             s=strstr(s," ` ")+2;

  2269 	}

  2270         /* check special case of 'S instead of 's at end of word */

  2271         s=aline+1;

  2272         while (*s)

  2273 	{

  2274             if (*s==CHAR_SQUOTE && s[1]=='S' && s[-1]>='a' && s[-1]<='z')

  2275 	    {

  2276                 if (pswit[ECHO_SWITCH])

  2277 		    printf("\n%s\n",aline);

  2278                 if (!pswit[OVERVIEW_SWITCH])

  2279                     printf("    Line %ld column %d - Capital \"S\"?\n",

  2280 		      linecnt,(int)(s-aline+2));

  2281                 else

  2282                     cnt_punct++;

  2283 	    }

  2284             s++;

  2285 	}

  2286         /*

  2287 	 * Now check special cases - start and end of line -

  2288          * for single and double quotes. Start is sometimes [sic]

  2289          * but better to query it anyway.

  2290          * While we're here, check for dash at end of line.

  2291 	 */

  2292         llen=strlen(aline);

  2293         if (llen>1)

  2294 	{

  2295             if (aline[llen-1]==CHAR_DQUOTE || aline[llen-1]==CHAR_SQUOTE ||

  2296 	      aline[llen-1]==CHAR_OPEN_SQUOTE)

  2297                 if (aline[llen-2]==CHAR_SPACE)

  2298 		{

  2299                     if (pswit[ECHO_SWITCH])

  2300 			printf("\n%s\n",aline);

  2301                     if (!pswit[OVERVIEW_SWITCH])

  2302                         printf("    Line %ld column %d - Spaced quote?\n",

  2303 			  linecnt,llen);

  2304                     else

  2305                         cnt_punct++;

  2306 		}

  2307             if ((aline[0]==CHAR_SQUOTE || aline[0]==CHAR_OPEN_SQUOTE) &&

  2308 	      aline[1]==CHAR_SPACE)

  2309 	    {

  2310 		if (pswit[ECHO_SWITCH])

  2311 		    printf("\n%s\n",aline);

  2312 		if (!pswit[OVERVIEW_SWITCH])

  2313 		    printf("    Line %ld column 1 - Spaced quote?\n",linecnt);

  2314 		else

  2315 		    cnt_punct++;

  2316 	    }

  2317             /*

  2318 	     * Dash at end of line may well be legit - paranoid mode only

  2319              * and don't report em-dash at line-end.

  2320 	     */

  2321             if (pswit[PARANOID_SWITCH] && warnings->hyphen)

  2322 	    {

  2323                 for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)

  2324 		    ;

  2325                 if (aline[i]=='-' && aline[i-1]!='-')

  2326 		{

  2327                     if (pswit[ECHO_SWITCH])

  2328 			printf("\n%s\n",aline);

  2329                     if (!pswit[OVERVIEW_SWITCH])

  2330                         printf("    Line %ld column %d - "

  2331 			  "Hyphen at end of line?\n",linecnt,i);

  2332 		}

  2333 	    }

  2334 	}

  2335         /*

  2336 	 * Brackets are often unspaced, but shouldn't be surrounded by alpha.

  2337          * If so, suspect a scanno like "a]most".

  2338 	 */

  2339         llen=strlen(aline);

  2340         for (i=1;i<llen-1;i++)

  2341 	{

  2342 	    /* for each bracket character in the line except 1st & last */

  2343             if (strchr("{[()]}",aline[i]) && gcisalpha(aline[i-1]) &&

  2344 	      gcisalpha(aline[i+1]))

  2345 	    {

  2346                 if (pswit[ECHO_SWITCH])

  2347 		    printf("\n%s\n",aline);

  2348                 if (!pswit[OVERVIEW_SWITCH])

  2349                     printf("    Line %ld column %d - Unspaced bracket?\n",

  2350 		      linecnt,i);

  2351                 else

  2352                     cnt_punct++;

  2353 	    }

  2354 	}

  2355         llen=strlen(aline);

  2356         if (warnings->endquote)

  2357 	{

  2358             for (i=1;i<llen;i++)

  2359 	    {

  2360 		/* for each character in the line except 1st */

  2361                 if (aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))

  2362 		{

  2363 		    if (pswit[ECHO_SWITCH])

  2364 			printf("\n%s\n",aline);

  2365 		    if (!pswit[OVERVIEW_SWITCH])

  2366 			printf("    Line %ld column %d - "

  2367 			  "endquote missing punctuation?\n",linecnt,i);

  2368 		    else

  2369 			cnt_punct++;

  2370 		}

  2371 	    }

  2372 	}

  2373 	/*

  2374          * Check for <HTML TAG>.

  2375          * If there is a < in the line, followed at some point

  2376          * by a > then we suspect HTML.

  2377 	 */

  2378         if (strstr(aline,"<") && strstr(aline,">"))

  2379 	{

  2380             i=(signed int)(strstr(aline,">")-strstr(aline,"<")+1);

  2381             if (i>0)

  2382 	    {

  2383                 strncpy(wrk,strstr(aline,"<"),i);

  2384                 wrk[i]=0;

  2385                 if (pswit[ECHO_SWITCH])

  2386 		    printf("\n%s\n",aline);

  2387                 if (!pswit[OVERVIEW_SWITCH])

  2388                     printf("    Line %ld column %d - HTML Tag? %s \n",

  2389 		      linecnt,(int)(strstr(aline,"<")-aline)+1,wrk);

  2390                 else

  2391                     cnt_html++;

  2392 	    }

  2393 	}

  2394         /*

  2395 	 * Check for &symbol; HTML.

  2396          * If there is a & in the line, followed at

  2397          * some point by a ; then we suspect HTML.

  2398 	 */

  2399         if (strstr(aline,"&") && strstr(aline,";"))

  2400 	{

  2401             i=(int)(strstr(aline,";")-strstr(aline,"&")+1);

  2402             for (s=strstr(aline,"&");s<strstr(aline,";");s++)

  2403                 if (*s==CHAR_SPACE)

  2404 		    i=0;                /* Don't report "Jones & Son;" */

  2405             if (i>0)

  2406 	    {

  2407                 strncpy(wrk,strstr(aline,"&"),i);

  2408                 wrk[i]=0;

  2409                 if (pswit[ECHO_SWITCH])

  2410 		    printf("\n%s\n",aline);

  2411                 if (!pswit[OVERVIEW_SWITCH])

  2412                     printf("    Line %ld column %d - HTML symbol? %s \n",

  2413 		      linecnt,(int)(strstr(aline,"&")-aline)+1,wrk);

  2414                 else

  2415                     cnt_html++;

  2416 	    }

  2417 	}

  2418         /*

  2419 	 * At end of paragraph, check for mismatched quotes.

  2420          * We don't want to report an error immediately, since it is a

  2421          * common convention to omit the quotes at end of paragraph if

  2422          * the next paragraph is a continuation of the same speaker.

  2423          * Where this is the case, the next para should begin with a

  2424          * quote, so we store the warning message and only display it

  2425          * at the top of the next iteration if the new para doesn't

  2426          * start with a quote.

  2427          * The -p switch overrides this default, and warns of unclosed

  2428          * quotes on _every_ paragraph, whether the next begins with a

  2429          * quote or not.

  2430 	 */

  2431         if (isemptyline)

  2432 	{

  2433 	    /* end of para - add up the totals */

  2434             if (counters.quot%2)

  2435                 sprintf(dquote_err,"    Line %ld - Mismatched quotes\n",

  2436 		  linecnt);

  2437             if (pswit[SQUOTE_SWITCH] && counters.open_single_quote &&

  2438 	      counters.open_single_quote!=counters.close_single_quote)

  2439                 sprintf(squote_err,"    Line %ld - Mismatched singlequotes?\n",

  2440 		  linecnt);

  2441             if (pswit[SQUOTE_SWITCH] && counters.open_single_quote &&

  2442 	      counters.open_single_quote!=counters.close_single_quote &&

  2443 	      counters.open_single_quote!=counters.close_single_quote+1)

  2444 		/*

  2445 		 * Flag it to be noted regardless of the

  2446 		 * first char of the next para.

  2447 		 */

  2448                 squot=1;

  2449             if (counters.r_brack)

  2450                 sprintf(rbrack_err,"    Line %ld - "

  2451 		  "Mismatched round brackets?\n",linecnt);

  2452             if (counters.s_brack)

  2453                 sprintf(sbrack_err,"    Line %ld - "

  2454 		  "Mismatched square brackets?\n",linecnt);

  2455             if (counters.c_brack)

  2456                 sprintf(cbrack_err,"    Line %ld - "

  2457 		  "Mismatched curly brackets?\n",linecnt);

  2458             if (counters.c_unders%2)

  2459                 sprintf(unders_err,"    Line %ld - Mismatched underscores?\n",

  2460 		  linecnt);

  2461 	    memset(&counters,0,sizeof(counters));

  2462 	    /* let the next iteration know that it's starting a new para */

  2463             isnewpara=1;

  2464 	}

  2465         /*

  2466 	 * Check for omitted punctuation at end of paragraph by working back

  2467 	 * through prevline. DW.

  2468          * Need to check this only for "normal" paras.

  2469          * So what is a "normal" para?

  2470          *    Not normal if one-liner (chapter headings, etc.)

  2471          *    Not normal if doesn't contain at least one locase letter

  2472          *    Not normal if starts with space

  2473 	 */

  2474         if (isemptyline)

  2475 	{

  2476 	    /* end of para */

  2477             for (s=prevline,i=0;*s && !i;s++)

  2478                 if (gcisletter(*s))

  2479 		    /* use i to indicate the presence of a letter on the line */

  2480                     i=1;

  2481             /*

  2482 	     * This next "if" is a problem.

  2483              * If we say "start_para_line <= linecnt - 1", that includes

  2484 	     * one-line "paragraphs" like chapter heads. Lotsa false positives.

  2485              * If we say "start_para_line < linecnt - 1" it doesn't, but then it

  2486              * misses genuine one-line paragraphs.

  2487 	     */

  2488             if (i && last.blen>2 && start_para_line<linecnt-1 &&

  2489 	      *prevline>CHAR_SPACE)

  2490 	    {

  2491                 for (i=strlen(prevline)-1;

  2492 		  (prevline[i]==CHAR_DQUOTE || prevline[i]==CHAR_SQUOTE) &&

  2493 		  prevline[i]>CHAR_SPACE && i>0;

  2494 		  i--)

  2495 		    ;

  2496                 for (;i>0;i--)

  2497 		{

  2498                     if (gcisalpha(prevline[i]))

  2499 		    {

  2500                         if (pswit[ECHO_SWITCH])

  2501 			    printf("\n%s\n",prevline);

  2502                         if (!pswit[OVERVIEW_SWITCH])

  2503                             printf("    Line %ld column %d - "

  2504 			      "No punctuation at para end?\n",

  2505 			      linecnt-1,strlen(prevline));

  2506                         else

  2507                             cnt_punct++;

  2508                         break;

  2509 		    }

  2510                     if (strchr("-.:!([{?}])",prevline[i]))

  2511                         break;

  2512 		}

  2513 	    }

  2514 	}

  2515         strcpy(prevline,aline);

  2516     }

  2517     fclose(infile);

  2518     if (!pswit[OVERVIEW_SWITCH])

  2519         for (i=0;i<MAX_QWORD;i++)

  2520             if (dupcnt[i])

  2521                 printf("\nNote: Queried word %s was duplicated %d time%s\n",

  2522 		  qword[i],dupcnt[i],"s");

  2523 }

  2525 /*

  2526  * flgets:

  2527  *

  2528  * Get one line from the input stream, checking for

  2529  * the existence of exactly one CR/LF line-end per line.

  2530  *

  2531  * Returns: a pointer to the line.

  2532  */

  2533 char *flgets(char *theline,int maxlen,FILE *thefile,long lcnt)

  2534 {

  2535     char c;

  2536     int len,isCR,cint;

  2537     *theline=0;

  2538     len=isCR=0;

  2539     c=cint=fgetc(thefile);

  2540     do

  2541     {

  2542         if (cint==EOF)

  2543             return NULL;

  2544 	/* either way, it's end of line */

  2545         if (c==10)

  2546 	{

  2547             if (isCR)

  2548                 break;

  2549             else

  2550 	    {

  2551 		/* Error - a LF without a preceding CR */

  2552                 if (pswit[LINE_END_SWITCH])

  2553 		{

  2554                     if (pswit[ECHO_SWITCH])

  2555 			printf("\n%s\n",theline);

  2556                     if (!pswit[OVERVIEW_SWITCH])

  2557                         printf("    Line %ld - No CR?\n",lcnt);

  2558                     else

  2559                         cnt_lineend++;

  2560 		}

  2561                 break;

  2562 	    }

  2563 	}

  2564         if (c==13)

  2565 	{

  2566             if (isCR)

  2567 	    {

  2568 		/* Error - two successive CRs */

  2569                 if (pswit[LINE_END_SWITCH])

  2570 		{

  2571                     if (pswit[ECHO_SWITCH])

  2572 			printf("\n%s\n",theline);

  2573                     if (!pswit[OVERVIEW_SWITCH])

  2574                         printf("    Line %ld - Two successive CRs?\n",lcnt);

  2575                     else

  2576                         cnt_lineend++;

  2577 		}

  2578 	    }

  2579             isCR=1;

  2580 	}

  2581         else

  2582 	{

  2583             if (pswit[LINE_END_SWITCH] && isCR)

  2584 	    {

  2585                 if (pswit[ECHO_SWITCH])

  2586 		    printf("\n%s\n",theline);

  2587                 if (!pswit[OVERVIEW_SWITCH])

  2588                     printf("    Line %ld column %d - CR without LF?\n",

  2589 		      lcnt,len+1);

  2590                 else

  2591                     cnt_lineend++;

  2592 	    }

  2593             theline[len]=c;

  2594             len++;

  2595             theline[len]=0;

  2596             isCR=0;

  2597 	}

  2598         c=cint=fgetc(thefile);

  2599     } while(len<maxlen);

  2600     if (pswit[MARKUP_SWITCH])

  2601         postprocess_for_HTML(theline);

  2602     if (pswit[DP_SWITCH])

  2603         postprocess_for_DP(theline);

  2604     return theline;

  2605 }

  2607 /*

  2608  * mixdigit:

  2609  *

  2610  * Takes a "word" as a parameter, and checks whether it

  2611  * contains a mixture of alpha and digits. Generally, this is an

  2612  * error, but may not be for cases like 4th or L5 12s. 3d.

  2613  *

  2614  * Returns: 0 if no error found, 1 if error.

  2615  */

  2616 int mixdigit(char *checkword)

  2617 {

  2618     int wehaveadigit,wehavealetter,firstdigits,query,wl;

  2619     char *s;

  2620     wehaveadigit=wehavealetter=query=0;

  2621     for (s=checkword;*s;s++)

  2622         if (gcisalpha(*s))

  2623             wehavealetter=1;

  2624         else

  2625             if (gcisdigit(*s))

  2626                 wehaveadigit=1;

  2627     if (wehaveadigit && wehavealetter)

  2628     {

  2629 	/* Now exclude common legit cases, like "21st" and "12l. 3s. 11d." */

  2630         query=1;

  2631         wl=strlen(checkword);

  2632         for (firstdigits=0;gcisdigit(checkword[firstdigits]);firstdigits++)

  2633             ;

  2634         /* digits, ending in st, rd, nd, th of either case */

  2635         if (firstdigits+2==wl && (matchword(checkword+wl-2,"st") ||

  2636 	  matchword(checkword+wl-2,"rd") || matchword(checkword+wl-2,"nd") ||

  2637 	  matchword(checkword+wl-2,"th")))

  2638 	    query=0;

  2639         if (firstdigits+3==wl && (matchword(checkword+wl-3,"sts") ||

  2640 	  matchword(checkword+wl-3,"rds") || matchword(checkword+wl-3,"nds") ||

  2641 	  matchword(checkword+wl-3,"ths")))

  2642 	    query=0;

  2643         if (firstdigits+3==wl && (matchword(checkword+wl-4,"stly") ||

  2644 	  matchword(checkword+wl-4,"rdly") ||

  2645 	  matchword(checkword+wl-4,"ndly") || matchword(checkword+wl-4,"thly")))

  2646 	    query=0;

  2647         /* digits, ending in l, L, s or d */

  2648         if (firstdigits+1==wl && (checkword[wl-1]=='l' ||

  2649 	  checkword[wl-1]=='L' || checkword[wl-1]=='s' || checkword[wl-1]=='d'))

  2650 	    query=0;

  2651         /*

  2652 	 * L at the start of a number, representing Britsh pounds, like L500.

  2653          * This is cute. We know the current word is mixeddigit. If the first

  2654          * letter is L, there must be at least one digit following. If both

  2655          * digits and letters follow, we have a genuine error, else we have a

  2656          * capital L followed by digits, and we accept that as a non-error.

  2657 	 */

  2658         if (checkword[0]=='L' && !mixdigit(checkword+1))

  2659 	    query=0;

  2660     }

  2661     return query;

  2662 }

  2664 /*

  2665  * getaword:

  2666  *

  2667  * Extracts the first/next "word" from the line, and puts

  2668  * it into "thisword". A word is defined as one English word unit--or

  2669  * at least that's the aim.

  2670  *

  2671  * Returns: a pointer to the position in the line where we will start

  2672  *          looking for the next word.

  2673  */

  2674 char *getaword(char *fromline,char *thisword)

  2675 {

  2676     int i,wordlen;

  2677     char *s;

  2678     wordlen=0;

  2679     for (;!gcisdigit(*fromline) && !gcisalpha(*fromline) && *fromline;

  2680       fromline++)

  2681 	;

  2682     /*

  2683      * Use a look-ahead to handle exceptions for numbers like 1,000 and 1.35.

  2684      * Especially yucky is the case of L1,000

  2685      * This section looks for a pattern of characters including a digit

  2686      * followed by a comma or period followed by one or more digits.

  2687      * If found, it returns this whole pattern as a word; otherwise we discard

  2688      * the results and resume our normal programming.

  2689      */

  2690     s=fromline;

  2691     for (;(gcisdigit(*s) || gcisalpha(*s) || *s==',' || *s=='.') &&

  2692       wordlen<MAXWORDLEN;s++)

  2693     {

  2694 	thisword[wordlen]=*s;

  2695         wordlen++;

  2696     }

  2697     thisword[wordlen]=0;

  2698     for (i=1;i<wordlen-1;i++)

  2699     {

  2700         if (thisword[i]=='.' || thisword[i]==',')

  2701 	{

  2702             if (gcisdigit(thisword[i-1]) && gcisdigit(thisword[i-1]))

  2703 	    {

  2704                 fromline=s;

  2705                 return fromline;

  2706 	    }

  2707 	}

  2708     }

  2709     /* we didn't find a punctuated number - do the regular getword thing */

  2710     wordlen=0;

  2711     for (;(gcisdigit(*fromline) || gcisalpha(*fromline) || *fromline=='\'') &&

  2712       wordlen<MAXWORDLEN;fromline++)

  2713     {

  2714         thisword[wordlen]=*fromline;

  2715         wordlen++;

  2716     }

  2717     thisword[wordlen]=0;

  2718     return fromline;

  2719 }

  2721 /*

  2722  * matchword:

  2723  *

  2724  * A case-insensitive string matcher.

  2725  */

  2726 int matchword(char *checkfor,char *thisword)

  2727 {

  2728     unsigned int ismatch,i;

  2729     if (strlen(checkfor)!=strlen(thisword))

  2730 	return 0;

  2731     ismatch=1;     /* assume a match until we find a difference */

  2732     for (i=0;i<strlen(checkfor);i++)

  2733         if (toupper(checkfor[i])!=toupper(thisword[i]))

  2734             ismatch=0;

  2735     return ismatch;

  2736 }

  2738 /*

  2739  * lowerit:

  2740  *

  2741  * Lowercase the line.

  2742  */

  2744 void lowerit(char *theline)

  2745 {

  2746     for (;*theline;theline++)

  2747         if (*theline>='A' && *theline<='Z')

  2748             *theline+=32;

  2749 }

  2751 /*

  2752  * isroman:

  2753  *

  2754  * Is this word a Roman Numeral?

  2755  *

  2756  * It doesn't actually validate that the number is a valid Roman Numeral--for

  2757  * example it will pass MXXXXXXXXXX as a valid Roman Numeral, but that's not

  2758  * what we're here to do. If it passes this, it LOOKS like a Roman numeral.

  2759  * Anyway, the actual Romans were pretty tolerant of bad arithmetic, or

  2760  * expressions thereof, except when it came to taxes. Allow any number of M,

  2761  * an optional D, an optional CM or CD, any number of optional Cs, an optional

  2762  * XL or an optional XC, an optional IX or IV, an optional V and any number

  2763  * of optional Is.

  2764  */

  2765 int isroman(char *t)

  2766 {

  2767     char *s;

  2768     if (!t || !*t)

  2769 	return 0;

  2770     s=t;

  2771     while (*t=='m' && *t)

  2772 	t++;

  2773     if (*t=='d')

  2774 	t++;

  2775     if (*t=='c' && t[1]=='m')

  2776 	t+=2;

  2777     if (*t=='c' && t[1]=='d')

  2778 	t+=2;

  2779     while (*t=='c' && *t)

  2780 	t++;

  2781     if (*t=='x' && t[1]=='l')

  2782 	t+=2;

  2783     if (*t=='x' && t[1]=='c')

  2784 	t+=2;

  2785     if (*t=='l')

  2786 	t++;

  2787     while (*t=='x' && *t)

  2788 	t++;

  2789     if (*t=='i' && t[1]=='x')

  2790 	t+=2;

  2791     if (*t=='i' && t[1]=='v')

  2792 	t+=2;

  2793     if (*t=='v')

  2794 	t++;

  2795     while (*t=='i' && *t)

  2796 	t++;

  2797     return !*t;

  2798 }

  2800 /*

  2801  * gcisalpha:

  2802  *

  2803  * A version of isalpha() that is somewhat lenient on 8-bit texts.

  2804  * If we use the standard function, 8-bit accented characters break

  2805  * words, so that tete with accented characters appears to be two words, "t"

  2806  * and "t", with 8-bit characters between them. This causes over-reporting of

  2807  * errors. gcisalpha() recognizes accented letters from the CP1252 (Windows)

  2808  * and ISO-8859-1 character sets, which are the most common PG 8-bit types.

  2809  */

  2810 int gcisalpha(unsigned char c)

  2811 {

  2812     if (c>='a' && c<='z')

  2813 	return 1;

  2814     if (c>='A' && c<='Z')

  2815 	return 1;

  2816     if (c<140)

  2817 	return 0;

  2818     if (c>=192 && c!=208 && c!=215 && c!=222 && c!=240 && c!=247 && c!=254)

  2819 	return 1;

  2820     if (c==140 || c==142 || c==156 || c==158 || c==159)

  2821 	return 1;

  2822     return 0;

  2823 }

  2825 /*

  2826  * gcisdigit:

  2827  *

  2828  * A version of isdigit() that doesn't get confused in 8-bit texts.

  2829  */

  2830 int gcisdigit(unsigned char c)

  2831 {

  2832     return c>='0' && c<='9';

  2833 }

  2835 /*

  2836  * gcisletter:

  2837  *

  2838  * A version of isletter() that doesn't get confused in 8-bit texts.

  2839  * NB: this is ISO-8891-1-specific.

  2840  */

  2841 int gcisletter(unsigned char c)

  2842 {

  2843     return c>='A' && c<='Z' || c>='a' && c<='z' || c>=192;

  2844 }

  2846 /*

  2847  * gcstrchr:

  2848  *

  2849  * Wraps strchr to return NULL if the character being searched for is zero.

  2850  */

  2851 char *gcstrchr(char *s,char c)

  2852 {

  2853     if (!c)

  2854 	return NULL;

  2855     return strchr(s,c);

  2856 }

  2858 /*

  2859  * postprocess_for_DP:

  2860  *

  2861  * Invoked with the -d switch from flgets().

  2862  * It simply "removes" from the line a hard-coded set of common

  2863  * DP-specific tags, so that the line passed to the main routine has

  2864  * been pre-cleaned of DP markup.

  2865  */

  2866 void postprocess_for_DP(char *theline)

  2867 {

  2868     char *s,*t;

  2869     int i;

  2870     if (!*theline)

  2871         return;

  2872     for (i=0;*DPmarkup[i];i++)

  2873     {

  2874         s=strstr(theline,DPmarkup[i]);

  2875         while (s)

  2876 	{

  2877             t=s+strlen(DPmarkup[i]);

  2878             while (*t)

  2879 	    {

  2880                 *s=*t;

  2881                 t++;

  2882 		s++;

  2883 	    }

  2884             *s=0;

  2885             s=strstr(theline,DPmarkup[i]);

  2886 	}

  2887     }

  2888 }

  2890 /*

  2891  * postprocess_for_HTML:

  2892  *

  2893  * Invoked with the -m switch from flgets().

  2894  * It simply "removes" from the line a hard-coded set of common

  2895  * HTML tags and "replaces" a hard-coded set of common HTML

  2896  * entities, so that the line passed to the main routine has

  2897  * been pre-cleaned of HTML.

  2898  */

  2899 void postprocess_for_HTML(char *theline)

  2900 {

  2901     if (strstr(theline,"<") && strstr(theline,">"))

  2902         while (losemarkup(theline))

  2903             ;

  2904     while (loseentities(theline))

  2905         ;

  2906 }

  2908 char *losemarkup(char *theline)

  2909 {

  2910     char *s,*t;

  2911     int i;

  2912     if (!*theline)

  2913         return NULL;

  2914     s=strstr(theline,"<");

  2915     t=strstr(theline,">");

  2916     if (!s || !t)

  2917 	return NULL;

  2918     for (i=0;*markup[i];i++)

  2919         if (!tagcomp(s+1,markup[i]))

  2920 	{

  2921             if (!t[1])

  2922 	    {

  2923                 *s=0;

  2924                 return s;

  2925 	    }

  2926             else if (t>s)

  2927 	    {

  2928 		strcpy(s,t+1);

  2929 		return s;

  2930 	    }

  2931         }

  2932     /* It's an unrecognized <xxx>. */

  2933     return NULL;

  2934 }

  2936 char *loseentities(char *theline)

  2937 {

  2938     int i;

  2939     char *s,*t;

  2940     if (!*theline)

  2941         return NULL;

  2942     for (i=0;*entities[i].htmlent;i++)

  2943     {

  2944         s=strstr(theline,entities[i].htmlent);

  2945         if (s)

  2946 	{

  2947             t=malloc((size_t)strlen(s));

  2948             if (!t)

  2949 		return NULL;

  2950             strcpy(t,s+strlen(entities[i].htmlent));

  2951             strcpy(s,entities[i].textent);

  2952             strcat(s,t);

  2953             free(t);

  2954             return theline;

  2955 	}

  2956     }

  2957     for (i=0;*entities[i].htmlnum;i++)

  2958     {

  2959         s=strstr(theline,entities[i].htmlnum);

  2960         if (s)

  2961 	{

  2962             t=malloc((size_t)strlen(s));

  2963             if (!t)

  2964 		return NULL;

  2965             strcpy(t,s+strlen(entities[i].htmlnum));

  2966             strcpy(s,entities[i].textent);

  2967             strcat(s,t);

  2968             free(t);

  2969             return theline;

  2970 	}

  2971     }

  2972     return NULL;

  2973 }

  2975 int tagcomp(char *strin,char *basetag)

  2976 {

  2977     char *s,*t;

  2978     s=basetag;

  2979     t=strin;

  2980     if (*t=='/')

  2981 	t++; /* ignore a slash */

  2982     while (*s && *t)

  2983     {

  2984         if (tolower(*s)!=tolower(*t))

  2985 	    return 1;

  2986         s++;

  2987 	t++;

  2988     }

  2989     return 0;

  2990 }

  2992 void proghelp()

  2993 {

  2994     fputs("Bookloupe version " PACKAGE_VERSION ".\n",stderr);

  2995     fputs("Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>.\n",stderr);

  2996     fputs("Copyright 2012- J. Ali Harlow <ali@juiblex.co.uk>.\n",stderr);

  2997     fputs("Bookloupe comes wih ABSOLUTELY NO WARRANTY. "

  2998       "For details, read the file COPYING.\n",stderr);

  2999     fputs("This is Free Software; "

  3000       "you may redistribute it under certain conditions (GPL);\n",stderr);

  3001     fputs("read the file COPYING for details.\n\n",stderr);

  3002     fputs("Usage is: bookloupe [-setpxloyhud] filename\n",stderr);

  3003     fputs("  where -s checks single quotes, -e suppresses echoing lines, "

  3004       "-t checks typos\n",stderr);

  3005     fputs("  -x (paranoid) switches OFF -t and extra checks, "

  3006       "-l turns OFF line-end checks\n",stderr);

  3007     fputs("  -o just displays overview without detail, "

  3008       "-h echoes header fields\n",stderr);

  3009     fputs("  -v (verbose) unsuppresses duplicate reporting, "

  3010       "-m suppresses markup\n",stderr);

  3011     fputs("  -d ignores DP-specific markup,\n",stderr);

  3012     fputs("  -u uses a file gutcheck.typ to query user-defined "

  3013       "possible typos\n",stderr);

  3014     fputs("Sample usage: bookloupe warpeace.txt \n",stderr);

  3015     fputs("\n",stderr);

  3016     fputs("Bookloupe looks for errors in Project Gutenberg(TM) etexts.\n",

  3017       stderr);

  3018     fputs("Bookloupe queries anything it thinks shouldn't be in a PG text; "

  3019       "non-ASCII\n",stderr);

  3020     fputs("characters like accented letters, "

  3021       "lines longer than 75 or shorter than 55,\n",stderr);

  3022     fputs("unbalanced quotes or brackets, "

  3023       "a variety of badly formatted punctuation, \n",stderr);

  3024     fputs("HTML tags, some likely typos. "

  3025       "It is NOT a substitute for human judgement.\n",stderr);

  3026     fputs("\n",stderr);

  3027 }

author	ali <ali@juiblex.co.uk>
	Sat May 25 19:27:51 2013 +0100 (2013-05-25)
changeset 45	d48f66b0ad0d
parent 44	66483ebc9b56
child 46	aa45307a6328
permissions	-rw-r--r--