1.1 --- a/bookloupe/bookloupe.c Sun Feb 26 09:52:49 2012 +0000
1.2 +++ b/bookloupe/bookloupe.c Fri May 24 22:47:16 2013 +0100
1.3 @@ -1,8 +1,8 @@
1.4 /*************************************************************************/
1.5 -/* gutcheck - check for assorted weirdnesses in a PG candidate text file */
1.6 +/* bookloupe--check for assorted weirdnesses in a PG candidate text file */
1.7 /* */
1.8 -/* Version 0.991 */
1.9 /* Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com> */
1.10 +/* Copyright 2012- J. Ali Harlow <ali@juiblex.co.uk> */
1.11 /* */
1.12 /* This program is free software; you can redistribute it and/or modify */
1.13 /* it under the terms of the GNU General Public License as published by */
1.14 @@ -11,66 +11,13 @@
1.15 /* */
1.16 /* This program is distributed in the hope that it will be useful, */
1.17 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
1.18 -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
1.19 +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
1.20 /* GNU General Public License for more details. */
1.21 /* */
1.22 /* You should have received a copy of the GNU General Public License */
1.23 -/* along with this program; if not, write to the */
1.24 -/* Free Software Foundation, Inc., */
1.25 -/* 59 Temple Place, */
1.26 -/* Suite 330, */
1.27 -/* Boston, MA 02111-1307 USA */
1.28 -/* */
1.29 -/* */
1.30 -/* */
1.31 -/* Overview comments: */
1.32 -/* */
1.33 -/* If you're reading this, you're either interested in how to detect */
1.34 -/* formatting errors, or very very bored. */
1.35 -/* */
1.36 -/* Gutcheck is a homebrew formatting checker specifically for */
1.37 -/* spotting common formatting problems in a PG e-text. I typically */
1.38 -/* run it once or twice on a file I'm about to submit; it usually */
1.39 -/* finds a few formatting problems. It also usually finds lots of */
1.40 -/* queries that aren't problems at all; it _really_ doesn't like */
1.41 -/* the standard PG header, for example. It's optimized for straight */
1.42 -/* prose; poetry and non-fiction involving tables tend to trigger */
1.43 -/* false alarms. */
1.44 -/* */
1.45 -/* The code of gutcheck is not very interesting, but the experience */
1.46 -/* of what constitutes a possible error may be, and the best way to */
1.47 -/* illustrate that is by example. */
1.48 -/* */
1.49 -/* */
1.50 -/* Here are some common typos found in PG texts that gutcheck */
1.51 -/* will flag as errors: */
1.52 -/* */
1.53 -/* "Look!John , over there!" */
1.54 -/* <this is a HTML tag> */
1.55 -/* &so is this; */
1.56 -/* Margaret said: " Now you should start for school." */
1.57 -/* Margaret said: "Now you should start for school. (if end of para) */
1.58 -/* The horse is said to he worth a lot. */
1.59 -/* 0K - this'11 make you look close1y. */
1.60 -/* "If you do. you'll regret it!" */
1.61 -/* */
1.62 -/* There are some complications . The extra space left around that */
1.63 -/* period was an error . . . but that ellipsis wasn't. */
1.64 -/* */
1.65 -/* The last line of a paragraph */
1.66 -/* is usually short. */
1.67 -/* */
1.68 -/* This period is an error.But the periods in a.m. aren't. */
1.69 -/* */
1.70 -/* Checks that are do-able but not (well) implemented are: */
1.71 -/* Single-quote chcking. */
1.72 -/* Despite 3 attempts at it, singlequote checking is still */
1.73 -/* crap in gutcheck. It may not be possible without analysis */
1.74 -/* of the whole paragraph. */
1.75 -/* */
1.76 +/* along with this program. If not, see <http://www.gnu.org/licenses/>. */
1.77 /*************************************************************************/
1.78
1.79 -
1.80 #include <stdio.h>
1.81 #include <stdlib.h>
1.82 #include <string.h>
1.83 @@ -89,216 +36,227 @@
1.84 char aline[LINEBUFSIZE];
1.85 char prevline[LINEBUFSIZE];
1.86
1.87 - /* Common typos. */
1.88 -char *typo[] = { "teh", "th", "og", "fi", "ro", "adn", "yuo", "ot", "fo", "thet", "ane", "nad",
1.89 - "te", "ig", "acn", "ahve", "alot", "anbd", "andt", "awya", "aywa", "bakc", "om",
1.90 - "btu", "byt", "cna", "cxan", "coudl", "dont", "didnt", "couldnt", "wouldnt", "doesnt", "shouldnt", "doign", "ehr",
1.91 - "hmi", "hse", "esle", "eyt", "fitrs", "firts", "foudn", "frmo", "fromt", "fwe", "gaurd", "gerat", "goign",
1.92 - "gruop", "haev", "hda", "hearign", "seeign", "sayign", "herat", "hge", "hsa", "hsi", "hte", "htere",
1.93 - "htese", "htey", "htis", "hvae", "hwich", "idae", "ihs", "iits", "int", "iwll", "iwth", "jsut", "loev",
1.94 - "sefl", "myu", "nkow", "nver", "nwe", "nwo", "ocur", "ohter", "omre", "onyl", "otehr", "otu", "owrk",
1.95 - "owuld", "peice", "peices", "peolpe", "peopel", "perhasp", "perhpas", "pleasent", "poeple", "porblem",
1.96 - "porblems", "rwite", "saidt", "saidh", "saids", "seh", "smae", "smoe", "sohw", "stnad", "stopry",
1.97 - "stoyr", "stpo", "tahn", "taht", "tath", "tehy", "tghe", "tghis", "theri", "theyll", "thgat", "thge",
1.98 - "thier", "thna", "thne", "thnig", "thnigs", "thsi", "thsoe", "thta", "timne", "tirne", "tkae",
1.99 - "tthe", "tyhat", "tyhe", "veyr", "vou", "vour", "vrey", "waht", "wasnt", "awtn", "watn", "wehn", "whic", "whcih",
1.100 - "whihc", "whta", "wihch", "wief", "wiht", "witha", "wiull", "wnat", "wnated", "wnats",
1.101 - "woh", "wohle", "wokr", "woudl", "wriet", "wrod", "wroet", "wroking", "wtih", "wuould", "wya", "yera",
1.102 - "yeras", "yersa", "yoiu", "youve", "ytou", "yuor",
1.103 - /* added h/b words for version 12 - removed a few with "tbe" v.25 */
1.104 - "abead", "ahle", "ahout", "ahove", "altbough", "balf", "bardly", "bas", "bave", "baving", "bebind",
1.105 - "beld", "belp", "belped", "ber", "bere", "bim", "bis", "bome", "bouse", "bowever", "buge", "dehates",
1.106 - "deht", "han", "hecause", "hecome", "heen", "hefore", "hegan", "hegin", "heing",
1.107 - "helieve", "henefit", "hetter", "hetween", "heyond", "hig", "higber", "huild", "huy", "hy", "jobn", "joh",
1.108 - "meanwbile", "memher", "memhers", "numher", "numhers",
1.109 - "perbaps", "prohlem", "puhlic", "witbout",
1.110 - /* and a few more for .18 */
1.111 - "arn", "hin", "hirn", "wrok", "wroked", "amd", "aud", "prornise", "prornised", "modem", "bo",
1.112 - "heside", "chapteb", "chaptee", "se",
1.113 - ""};
1.114 +/* Common typos. */
1.115 +char *typo[] = {
1.116 + "teh", "th", "og", "fi", "ro", "adn", "yuo", "ot", "fo", "thet", "ane",
1.117 + "nad", "te", "ig", "acn", "ahve", "alot", "anbd", "andt", "awya", "aywa",
1.118 + "bakc", "om", "btu", "byt", "cna", "cxan", "coudl", "dont", "didnt",
1.119 + "couldnt", "wouldnt", "doesnt", "shouldnt", "doign", "ehr", "hmi", "hse",
1.120 + "esle", "eyt", "fitrs", "firts", "foudn", "frmo", "fromt", "fwe", "gaurd",
1.121 + "gerat", "goign", "gruop", "haev", "hda", "hearign", "seeign", "sayign",
1.122 + "herat", "hge", "hsa", "hsi", "hte", "htere", "htese", "htey", "htis",
1.123 + "hvae", "hwich", "idae", "ihs", "iits", "int", "iwll", "iwth", "jsut",
1.124 + "loev", "sefl", "myu", "nkow", "nver", "nwe", "nwo", "ocur", "ohter",
1.125 + "omre", "onyl", "otehr", "otu", "owrk", "owuld", "peice", "peices",
1.126 + "peolpe", "peopel", "perhasp", "perhpas", "pleasent", "poeple", "porblem",
1.127 + "porblems", "rwite", "saidt", "saidh", "saids", "seh", "smae", "smoe",
1.128 + "sohw", "stnad", "stopry", "stoyr", "stpo", "tahn", "taht", "tath",
1.129 + "tehy", "tghe", "tghis", "theri", "theyll", "thgat", "thge", "thier",
1.130 + "thna", "thne", "thnig", "thnigs", "thsi", "thsoe", "thta", "timne",
1.131 + "tirne", "tkae", "tthe", "tyhat", "tyhe", "veyr", "vou", "vour", "vrey",
1.132 + "waht", "wasnt", "awtn", "watn", "wehn", "whic", "whcih", "whihc", "whta",
1.133 + "wihch", "wief", "wiht", "witha", "wiull", "wnat", "wnated", "wnats",
1.134 + "woh", "wohle", "wokr", "woudl", "wriet", "wrod", "wroet", "wroking",
1.135 + "wtih", "wuould", "wya", "yera", "yeras", "yersa", "yoiu", "youve",
1.136 + "ytou", "yuor", "abead", "ahle", "ahout", "ahove", "altbough", "balf",
1.137 + "bardly", "bas", "bave", "baving", "bebind", "beld", "belp", "belped",
1.138 + "ber", "bere", "bim", "bis", "bome", "bouse", "bowever", "buge",
1.139 + "dehates", "deht", "han", "hecause", "hecome", "heen", "hefore", "hegan",
1.140 + "hegin", "heing", "helieve", "henefit", "hetter", "hetween", "heyond",
1.141 + "hig", "higber", "huild", "huy", "hy", "jobn", "joh", "meanwbile",
1.142 + "memher", "memhers", "numher", "numhers", "perbaps", "prohlem", "puhlic",
1.143 + "witbout", "arn", "hin", "hirn", "wrok", "wroked", "amd", "aud",
1.144 + "prornise", "prornised", "modem", "bo", "heside", "chapteb", "chaptee",
1.145 + "se", ""
1.146 +};
1.147
1.148 char *usertypo[MAX_USER_TYPOS];
1.149
1.150 - /* Common abbreviations and other OK words not to query as typos. */
1.151 - /* 0.99 last-minute - removed "ms" */
1.152 -char *okword[] = {"mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "hmm", "h'm", "hmmm", "rd", "sh", "br",
1.153 - "pp", "hm", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", "pompeii","hawaii","hawaiian",
1.154 - "hotbed", "heartbeat", "heartbeats", "outbid", "outbids", "frostbite", "frostbitten",
1.155 - ""};
1.156 +/* Common abbreviations and other OK words not to query as typos. */
1.157 +char *okword[] = {
1.158 + "mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "hmm", "h'm", "hmmm",
1.159 + "rd", "sh", "br", "pp", "hm", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd",
1.160 + "pompeii","hawaii","hawaiian", "hotbed", "heartbeat", "heartbeats",
1.161 + "outbid", "outbids", "frostbite", "frostbitten", ""
1.162 +};
1.163
1.164 - /* Common abbreviations that cause otherwise unexplained periods. */
1.165 -char *abbrev[] = {"cent", "cents", "viz", "vol", "vols", "vid", "ed", "al", "etc", "op", "cit",
1.166 - "deg", "min", "chap", "oz", "mme", "mlle", "mssrs",
1.167 - ""};
1.168 - /* Two-Letter combinations that rarely if ever start words, */
1.169 - /* but are common scannos or otherwise common letter */
1.170 - /* combinations. */
1.171 -char *nostart[] = { "hr", "hl", "cb", "sb", "tb", "wb", "tl",
1.172 - "tn", "rn", "lt", "tj",
1.173 - "" };
1.174 +/* Common abbreviations that cause otherwise unexplained periods. */
1.175 +char *abbrev[] = {
1.176 + "cent", "cents", "viz", "vol", "vols", "vid", "ed", "al", "etc", "op",
1.177 + "cit", "deg", "min", "chap", "oz", "mme", "mlle", "mssrs", ""
1.178 +};
1.179
1.180 - /* Two-Letter combinations that rarely if ever end words */
1.181 - /* but are common scannos or otherwise common letter */
1.182 - /* combinations */
1.183 -char *noend[] = { "cb", "gb", "pb", "sb", "tb",
1.184 - "wh","fr","br","qu","tw","gl","fl","sw","gr","sl","cl",
1.185 - "iy",
1.186 - ""};
1.187 +/*
1.188 + * Two-Letter combinations that rarely if ever start words,
1.189 + * but are common scannos or otherwise common letter combinations.
1.190 + */
1.191 +char *nostart[] = {
1.192 + "hr", "hl", "cb", "sb", "tb", "wb", "tl", "tn", "rn", "lt", "tj", ""
1.193 +};
1.194
1.195 -char *markup[] = { "a", "b", "big", "blockquote", "body", "br", "center",
1.196 - "col", "div", "em", "font", "h1", "h2", "h3", "h4",
1.197 - "h5", "h6", "head", "hr", "html", "i", "img", "li",
1.198 - "meta", "ol", "p", "pre", "small", "span", "strong",
1.199 - "sub", "sup", "table", "td", "tfoot", "thead", "title",
1.200 - "tr", "tt", "u", "ul",
1.201 - ""};
1.202 +/*
1.203 + * Two-Letter combinations that rarely if ever end words,
1.204 + * but are common scannos or otherwise common letter combinations.
1.205 + */
1.206 +char *noend[] = {
1.207 + "cb", "gb", "pb", "sb", "tb", "wh", "fr", "br", "qu", "tw", "gl", "fl",
1.208 + "sw", "gr", "sl", "cl", "iy", ""
1.209 +};
1.210
1.211 -char *DPmarkup[] = { "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/", "<tb>",
1.212 - ""}; /* <tb> added .991 */
1.213 +char *markup[] = {
1.214 + "a", "b", "big", "blockquote", "body", "br", "center", "col", "div", "em",
1.215 + "font", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html", "i",
1.216 + "img", "li", "meta", "ol", "p", "pre", "small", "span", "strong", "sub",
1.217 + "sup", "table", "td", "tfoot", "thead", "title", "tr", "tt", "u", "ul", ""
1.218 +};
1.219
1.220 -char *nocomma[] = { "the", "it's", "their", "an", "mrs", "a", "our", "that's",
1.221 - "its", "whose", "every", "i'll", "your", "my",
1.222 - "mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "rd",
1.223 - "pp", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd",
1.224 - "i'm", "during", "let", "toward", "among",
1.225 - ""};
1.226 +char *DPmarkup[] = {
1.227 + "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/", "<tb>", ""
1.228 +};
1.229
1.230 +char *nocomma[] = {
1.231 + "the", "it's", "their", "an", "mrs", "a", "our", "that's", "its", "whose",
1.232 + "every", "i'll", "your", "my", "mr", "mrs", "mss", "mssrs", "ft", "pm",
1.233 + "st", "dr", "rd", "pp", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", "i'm",
1.234 + "during", "let", "toward", "among", ""
1.235 +};
1.236
1.237 -char *noperiod[] = { "every", "i'm", "during", "that's", "their", "your", "our", "my", "or",
1.238 - "and", "but", "as", "if", "the", "its", "it's", "until", "than", "whether",
1.239 - "i'll", "whose", "who", "because", "when", "let", "till", "very",
1.240 - "an", "among", "those", "into", "whom", "having", "thence",
1.241 - ""};
1.242 +char *noperiod[] = {
1.243 + "every", "i'm", "during", "that's", "their", "your", "our", "my", "or",
1.244 + "and", "but", "as", "if", "the", "its", "it's", "until", "than", "whether",
1.245 + "i'll", "whose", "who", "because", "when", "let", "till", "very", "an",
1.246 + "among", "those", "into", "whom", "having", "thence", ""
1.247 +};
1.248
1.249 -
1.250 -char vowels[] = "aeiouàáâãäæèéêëìíîïòóôõöùúûü"; /* Carlo's old suggestion, updated .991 */
1.251 +char vowels[] = "aeiouà áâãäæèéêëìÃîïòóôõöùúûü";
1.252
1.253 struct {
1.254 char *htmlent;
1.255 char *htmlnum;
1.256 char *textent;
1.257 - } entities[] = { "&", "&", "&",
1.258 - "<", "<", "<",
1.259 - ">", ">", ">",
1.260 - "°", "°", " degrees",
1.261 - "£", "£", "L",
1.262 - """, """, "\"", /* -- quotation mark = APL quote, */
1.263 - "Œ", "Œ", "OE", /* -- latin capital ligature OE, */
1.264 - "œ", "œ", "oe", /* -- latin small ligature oe, U+0153 ISOlat2 --> */
1.265 - "Š", "Š", "S", /* -- latin capital letter S with caron, */
1.266 - "š", "š", "s", /* -- latin small letter s with caron, */
1.267 - "Ÿ", "Ÿ", "Y", /* -- latin capital letter Y with diaeresis, */
1.268 - "ˆ", "ˆ", "", /* -- modifier letter circumflex accent, */
1.269 - "˜", "˜", "~", /* -- small tilde, U+02DC ISOdia --> */
1.270 - " ", " ", " ", /* -- en space, U+2002 ISOpub --> */
1.271 - " ", " ", " ", /* -- em space, U+2003 ISOpub --> */
1.272 - " ", " ", " ", /* -- thin space, U+2009 ISOpub --> */
1.273 - "–", "–", "-", /* -- en dash, U+2013 ISOpub --> */
1.274 - "—", "—", "--", /* -- em dash, U+2014 ISOpub --> */
1.275 - "‘", "‘", "'", /* -- left single quotation mark, */
1.276 - "’", "’", "'", /* -- right single quotation mark, */
1.277 - "‚", "‚", "'", /* -- single low-9 quotation mark, U+201A NEW --> */
1.278 - "“", "“", "\"", /* -- left double quotation mark, */
1.279 - "”", "”", "\"", /* -- right double quotation mark, */
1.280 - "„", "„", "\"", /* -- double low-9 quotation mark, U+201E NEW --> */
1.281 - "‹", "‹", "\"", /* -- single left-pointing angle quotation mark, */
1.282 - "›", "›", "\"", /* -- single right-pointing angle quotation mark, */
1.283 - " ", " ", " ", /* -- no-break space = non-breaking space, */
1.284 - "¡", "¡", "!", /* -- inverted exclamation mark, U+00A1 ISOnum --> */
1.285 - "¢", "¢", "c", /* -- cent sign, U+00A2 ISOnum --> */
1.286 - "£", "£", "L", /* -- pound sign, U+00A3 ISOnum --> */
1.287 - "¤", "¤", "$", /* -- currency sign, U+00A4 ISOnum --> */
1.288 - "¥", "¥", "Y", /* -- yen sign = yuan sign, U+00A5 ISOnum --> */
1.289 - "§", "§", "--", /* -- section sign, U+00A7 ISOnum --> */
1.290 - "¨", "¨", " ", /* -- diaeresis = spacing diaeresis, */
1.291 - "©", "©", "(C) ", /* -- copyright sign, U+00A9 ISOnum --> */
1.292 - "ª", "ª", " ", /* -- feminine ordinal indicator, U+00AA ISOnum --> */
1.293 - "«", "«", "\"", /* -- left-pointing double angle quotation mark */
1.294 - "­", "­", "-", /* -- soft hyphen = discretionary hyphen, */
1.295 - "®", "®", "(R) ", /* -- registered sign = registered trade mark sign, */
1.296 - "¯", "¯", " ", /* -- macron = spacing macron = overline */
1.297 - "°", "°", " degrees", /* -- degree sign, U+00B0 ISOnum --> */
1.298 - "±", "±", "+-", /* -- plus-minus sign = plus-or-minus sign, */
1.299 - "²", "²", "2", /* -- superscript two = superscript digit two */
1.300 - "³", "³", "3", /* -- superscript three = superscript digit three */
1.301 - "´", "´", " ", /* -- acute accent = spacing acute, */
1.302 - "µ", "µ", "m", /* -- micro sign, U+00B5 ISOnum --> */
1.303 - "¶", "¶", "--", /* -- pilcrow sign = paragraph sign, */
1.304 - "¸", "¸", " ", /* -- cedilla = spacing cedilla, U+00B8 ISOdia --> */
1.305 - "¹", "¹", "1", /* -- superscript one = superscript digit one, */
1.306 - "º", "º", " ", /* -- masculine ordinal indicator, */
1.307 - "»", "»", "\"", /* -- right-pointing double angle quotation mark */
1.308 - "¼", "¼", "1/4", /* -- vulgar fraction one quarter */
1.309 - "½", "½", "1/2", /* -- vulgar fraction one half */
1.310 - "¾", "¾", "3/4", /* -- vulgar fraction three quarters */
1.311 - "¿", "¿", "?", /* -- inverted question mark */
1.312 - "À", "À", "A", /* -- latin capital letter A with grave */
1.313 - "Á", "Á", "A", /* -- latin capital letter A with acute, */
1.314 - "Â", "Â", "A", /* -- latin capital letter A with circumflex, */
1.315 - "Ã", "Ã", "A", /* -- latin capital letter A with tilde, */
1.316 - "Ä", "Ä", "A", /* -- latin capital letter A with diaeresis, */
1.317 - "Å", "Å", "A", /* -- latin capital letter A with ring above */
1.318 - "Æ", "Æ", "AE", /* -- latin capital letter AE */
1.319 - "Ç", "Ç", "C", /* -- latin capital letter C with cedilla, */
1.320 - "È", "È", "E", /* -- latin capital letter E with grave, */
1.321 - "É", "É", "E", /* -- latin capital letter E with acute, */
1.322 - "Ê", "Ê", "E", /* -- latin capital letter E with circumflex, */
1.323 - "Ë", "Ë", "E", /* -- latin capital letter E with diaeresis, */
1.324 - "Ì", "Ì", "I", /* -- latin capital letter I with grave, */
1.325 - "Í", "Í", "I", /* -- latin capital letter I with acute, */
1.326 - "Î", "Î", "I", /* -- latin capital letter I with circumflex, */
1.327 - "Ï", "Ï", "I", /* -- latin capital letter I with diaeresis, */
1.328 - "Ð", "Ð", "E", /* -- latin capital letter ETH, U+00D0 ISOlat1 --> */
1.329 - "Ñ", "Ñ", "N", /* -- latin capital letter N with tilde, */
1.330 - "Ò", "Ò", "O", /* -- latin capital letter O with grave, */
1.331 - "Ó", "Ó", "O", /* -- latin capital letter O with acute, */
1.332 - "Ô", "Ô", "O", /* -- latin capital letter O with circumflex, */
1.333 - "Õ", "Õ", "O", /* -- latin capital letter O with tilde, */
1.334 - "Ö", "Ö", "O", /* -- latin capital letter O with diaeresis, */
1.335 - "×", "×", "*", /* -- multiplication sign, U+00D7 ISOnum --> */
1.336 - "Ø", "Ø", "O", /* -- latin capital letter O with stroke */
1.337 - "Ù", "Ù", "U", /* -- latin capital letter U with grave, */
1.338 - "Ú", "Ú", "U", /* -- latin capital letter U with acute, */
1.339 - "Û", "Û", "U", /* -- latin capital letter U with circumflex, */
1.340 - "Ü", "Ü", "U", /* -- latin capital letter U with diaeresis, */
1.341 - "Ý", "Ý", "Y", /* -- latin capital letter Y with acute, */
1.342 - "Þ", "Þ", "TH", /* -- latin capital letter THORN, */
1.343 - "ß", "ß", "sz", /* -- latin small letter sharp s = ess-zed, */
1.344 - "à", "à", "a", /* -- latin small letter a with grave */
1.345 - "á", "á", "a", /* -- latin small letter a with acute, */
1.346 - "â", "â", "a", /* -- latin small letter a with circumflex, */
1.347 - "ã", "ã", "a", /* -- latin small letter a with tilde, */
1.348 - "ä", "ä", "a", /* -- latin small letter a with diaeresis, */
1.349 - "å", "å", "a", /* -- latin small letter a with ring above */
1.350 - "æ", "æ", "ae", /* -- latin small letter ae */
1.351 - "ç", "ç", "c", /* -- latin small letter c with cedilla, */
1.352 - "è", "è", "e", /* -- latin small letter e with grave, */
1.353 - "é", "é", "e", /* -- latin small letter e with acute, */
1.354 - "ê", "ê", "e", /* -- latin small letter e with circumflex, */
1.355 - "ë", "ë", "e", /* -- latin small letter e with diaeresis, */
1.356 - "ì", "ì", "i", /* -- latin small letter i with grave, */
1.357 - "í", "í", "i", /* -- latin small letter i with acute, */
1.358 - "î", "î", "i", /* -- latin small letter i with circumflex, */
1.359 - "ï", "ï", "i", /* -- latin small letter i with diaeresis, */
1.360 - "ð", "ð", "eth", /* -- latin small letter eth, U+00F0 ISOlat1 --> */
1.361 - "ñ", "ñ", "n", /* -- latin small letter n with tilde, */
1.362 - "ò", "ò", "o", /* -- latin small letter o with grave, */
1.363 - "ó", "ó", "o", /* -- latin small letter o with acute, */
1.364 - "ô", "ô", "o", /* -- latin small letter o with circumflex, */
1.365 - "õ", "õ", "o", /* -- latin small letter o with tilde, */
1.366 - "ö", "ö", "o", /* -- latin small letter o with diaeresis, */
1.367 - "÷", "÷", "/", /* -- division sign, U+00F7 ISOnum --> */
1.368 - "ø", "ø", "o", /* -- latin small letter o with stroke, */
1.369 - "ù", "ù", "u", /* -- latin small letter u with grave, */
1.370 - "ú", "ú", "u", /* -- latin small letter u with acute, */
1.371 - "û", "û", "u", /* -- latin small letter u with circumflex, */
1.372 - "ü", "ü", "u", /* -- latin small letter u with diaeresis, */
1.373 - "ý", "ý", "y", /* -- latin small letter y with acute, */
1.374 - "þ", "þ", "th", /* -- latin small letter thorn, */
1.375 - "ÿ", "ÿ", "y", /* -- latin small letter y with diaeresis, */
1.376 - "", "" };
1.377 -
1.378 -/* ---- list of special characters ---- */
1.379 +} entities[] = {
1.380 + "&", "&", "&",
1.381 + "<", "<", "<",
1.382 + ">", ">", ">",
1.383 + "°", "°", " degrees",
1.384 + "£", "£", "L",
1.385 + """, """, "\"", /* quotation mark = APL quote */
1.386 + "Œ", "Œ", "OE", /* latin capital ligature OE */
1.387 + "œ", "œ", "oe", /* latin small ligature oe */
1.388 + "Š", "Š", "S", /* latin capital letter S with caron */
1.389 + "š", "š", "s", /* latin small letter s with caron */
1.390 + "Ÿ", "Ÿ", "Y", /* latin capital letter Y with diaeresis */
1.391 + "ˆ", "ˆ", "", /* modifier letter circumflex accent */
1.392 + "˜", "˜", "~", /* small tilde, U+02DC ISOdia */
1.393 + " ", " ", " ", /* en space, U+2002 ISOpub */
1.394 + " ", " ", " ", /* em space, U+2003 ISOpub */
1.395 + " ", " ", " ", /* thin space, U+2009 ISOpub */
1.396 + "–", "–", "-", /* en dash, U+2013 ISOpub */
1.397 + "—", "—", "--", /* em dash, U+2014 ISOpub */
1.398 + "’", "’", "'", /* right single quotation mark */
1.399 + "‚", "‚", "'", /* single low-9 quotation mark */
1.400 + "“", "“", "\"", /* left double quotation mark */
1.401 + "”", "”", "\"", /* right double quotation mark */
1.402 + "„", "„", "\"", /* double low-9 quotation mark */
1.403 + "‹", "‹", "\"", /* single left-pointing angle quotation mark */
1.404 + "›", "›", "\"", /* single right-pointing angle quotation mark */
1.405 + " ", " ", " ", /* no-break space = non-breaking space, */
1.406 + "¡", "¡", "!", /* inverted exclamation mark */
1.407 + "¢", "¢", "c", /* cent sign */
1.408 + "£", "£", "L", /* pound sign */
1.409 + "¤", "¤", "$", /* currency sign */
1.410 + "¥", "¥", "Y", /* yen sign = yuan sign */
1.411 + "§", "§", "--", /* section sign */
1.412 + "¨", "¨", " ", /* diaeresis = spacing diaeresis */
1.413 + "©", "©", "(C) ", /* copyright sign */
1.414 + "ª", "ª", " ", /* feminine ordinal indicator */
1.415 + "«", "«", "\"", /* left-pointing double angle quotation mark */
1.416 + "­", "­", "-", /* soft hyphen = discretionary hyphen */
1.417 + "®", "®", "(R) ", /* registered sign = registered trade mark sign */
1.418 + "¯", "¯", " ", /* macron = spacing macron = overline */
1.419 + "°", "°", " degrees", /* degree sign */
1.420 + "±", "±", "+-", /* plus-minus sign = plus-or-minus sign */
1.421 + "²", "²", "2", /* superscript two = superscript digit two */
1.422 + "³", "³", "3", /* superscript three = superscript digit three */
1.423 + "´", "´", " ", /* acute accent = spacing acute */
1.424 + "µ", "µ", "m", /* micro sign */
1.425 + "¶", "¶", "--", /* pilcrow sign = paragraph sign */
1.426 + "¸", "¸", " ", /* cedilla = spacing cedilla */
1.427 + "¹", "¹", "1", /* superscript one = superscript digit one */
1.428 + "º", "º", " ", /* masculine ordinal indicator */
1.429 + "»", "»", "\"", /* right-pointing double angle quotation mark */
1.430 + "¼", "¼", "1/4", /* vulgar fraction one quarter */
1.431 + "½", "½", "1/2", /* vulgar fraction one half */
1.432 + "¾", "¾", "3/4", /* vulgar fraction three quarters */
1.433 + "¿", "¿", "?", /* inverted question mark */
1.434 + "À", "À", "A", /* latin capital letter A with grave */
1.435 + "Á", "Á", "A", /* latin capital letter A with acute */
1.436 + "Â", "Â", "A", /* latin capital letter A with circumflex */
1.437 + "Ã", "Ã", "A", /* latin capital letter A with tilde */
1.438 + "Ä", "Ä", "A", /* latin capital letter A with diaeresis */
1.439 + "Å", "Å", "A", /* latin capital letter A with ring above */
1.440 + "Æ", "Æ", "AE", /* latin capital letter AE */
1.441 + "Ç", "Ç", "C", /* latin capital letter C with cedilla */
1.442 + "È", "È", "E", /* latin capital letter E with grave */
1.443 + "É", "É", "E", /* latin capital letter E with acute */
1.444 + "Ê", "Ê", "E", /* latin capital letter E with circumflex */
1.445 + "Ë", "Ë", "E", /* latin capital letter E with diaeresis */
1.446 + "Ì", "Ì", "I", /* latin capital letter I with grave */
1.447 + "Í", "Í", "I", /* latin capital letter I with acute */
1.448 + "Î", "Î", "I", /* latin capital letter I with circumflex */
1.449 + "Ï", "Ï", "I", /* latin capital letter I with diaeresis */
1.450 + "Ð", "Ð", "E", /* latin capital letter ETH */
1.451 + "Ñ", "Ñ", "N", /* latin capital letter N with tilde */
1.452 + "Ò", "Ò", "O", /* latin capital letter O with grave */
1.453 + "Ó", "Ó", "O", /* latin capital letter O with acute */
1.454 + "Ô", "Ô", "O", /* latin capital letter O with circumflex */
1.455 + "Õ", "Õ", "O", /* latin capital letter O with tilde */
1.456 + "Ö", "Ö", "O", /* latin capital letter O with diaeresis */
1.457 + "×", "×", "*", /* multiplication sign */
1.458 + "Ø", "Ø", "O", /* latin capital letter O with stroke */
1.459 + "Ù", "Ù", "U", /* latin capital letter U with grave */
1.460 + "Ú", "Ú", "U", /* latin capital letter U with acute */
1.461 + "Û", "Û", "U", /* latin capital letter U with circumflex */
1.462 + "Ü", "Ü", "U", /* latin capital letter U with diaeresis */
1.463 + "Ý", "Ý", "Y", /* latin capital letter Y with acute */
1.464 + "Þ", "Þ", "TH", /* latin capital letter THORN */
1.465 + "ß", "ß", "sz", /* latin small letter sharp s = ess-zed */
1.466 + "à", "à", "a", /* latin small letter a with grave */
1.467 + "á", "á", "a", /* latin small letter a with acute */
1.468 + "â", "â", "a", /* latin small letter a with circumflex */
1.469 + "ã", "ã", "a", /* latin small letter a with tilde */
1.470 + "ä", "ä", "a", /* latin small letter a with diaeresis */
1.471 + "å", "å", "a", /* latin small letter a with ring above */
1.472 + "æ", "æ", "ae", /* latin small letter ae */
1.473 + "ç", "ç", "c", /* latin small letter c with cedilla */
1.474 + "è", "è", "e", /* latin small letter e with grave */
1.475 + "é", "é", "e", /* latin small letter e with acute */
1.476 + "ê", "ê", "e", /* latin small letter e with circumflex */
1.477 + "ë", "ë", "e", /* latin small letter e with diaeresis */
1.478 + "ì", "ì", "i", /* latin small letter i with grave */
1.479 + "í", "í", "i", /* latin small letter i with acute */
1.480 + "î", "î", "i", /* latin small letter i with circumflex */
1.481 + "ï", "ï", "i", /* latin small letter i with diaeresis */
1.482 + "ð", "ð", "eth", /* latin small letter eth */
1.483 + "ñ", "ñ", "n", /* latin small letter n with tilde */
1.484 + "ò", "ò", "o", /* latin small letter o with grave */
1.485 + "ó", "ó", "o", /* latin small letter o with acute */
1.486 + "ô", "ô", "o", /* latin small letter o with circumflex */
1.487 + "õ", "õ", "o", /* latin small letter o with tilde */
1.488 + "ö", "ö", "o", /* latin small letter o with diaeresis */
1.489 + "÷", "÷", "/", /* division sign */
1.490 + "ø", "ø", "o", /* latin small letter o with stroke */
1.491 + "ù", "ù", "u", /* latin small letter u with grave */
1.492 + "ú", "ú", "u", /* latin small letter u with acute */
1.493 + "û", "û", "u", /* latin small letter u with circumflex */
1.494 + "ü", "ü", "u", /* latin small letter u with diaeresis */
1.495 + "ý", "ý", "y", /* latin small letter y with acute */
1.496 + "þ", "þ", "th", /* latin small letter thorn */
1.497 + "ÿ", "ÿ", "y", /* latin small letter y with diaeresis */
1.498 + "", ""
1.499 +};
1.500 +
1.501 +/* special characters */
1.502 #define CHAR_SPACE 32
1.503 #define CHAR_TAB 9
1.504 #define CHAR_LF 10
1.505 @@ -319,11 +277,7 @@
1.506 #define CHAR_OPEN_SBRACK '['
1.507 #define CHAR_CLOSE_SBRACK ']'
1.508
1.509 -
1.510 -
1.511 -
1.512 -
1.513 -/* ---- longest and shortest normal PG line lengths ----*/
1.514 +/* longest and shortest normal PG line lengths */
1.515 #define LONGEST_PG_LINE 75
1.516 #define WAY_TOO_LONG 80
1.517 #define SHORTEST_PG_LINE 55
1.518 @@ -369,8 +323,6 @@
1.519 #define USERTYPO_SWITCH 12
1.520 #define DP_SWITCH 13
1.521
1.522 -
1.523 -
1.524 long cnt_dquot; /* for overview mode, count of doublequote queries */
1.525 long cnt_squot; /* for overview mode, count of singlequote queries */
1.526 long cnt_brack; /* for overview mode, count of brackets queries */
1.527 @@ -383,9 +335,9 @@
1.528 long cnt_word; /* for overview mode, count of word queries */
1.529 long cnt_html; /* for overview mode, count of html queries */
1.530 long cnt_lineend; /* for overview mode, count of line-end queries */
1.531 -long cnt_spacend; /* count of lines with space at end V .21 */
1.532 +long cnt_spacend; /* count of lines with space at end */
1.533 long linecnt; /* count of total lines in the file */
1.534 -long checked_linecnt; /* count of lines actually gutchecked V .26 */
1.535 +long checked_linecnt; /* count of lines actually checked */
1.536
1.537 void proghelp(void);
1.538 void procfile(char *);
1.539 @@ -405,18 +357,18 @@
1.540 char running_from[MAX_PATH];
1.541
1.542 int mixdigit(char *);
1.543 -char *getaword(char *, char *);
1.544 -int matchword(char *, char *);
1.545 -char *flgets(char *, int, FILE *, long);
1.546 +char *getaword(char *,char *);
1.547 +int matchword(char *,char *);
1.548 +char *flgets(char *,int,FILE *,long);
1.549 void lowerit(char *);
1.550 int gcisalpha(unsigned char);
1.551 int gcisdigit(unsigned char);
1.552 int gcisletter(unsigned char);
1.553 -char *gcstrchr(char *s, char c);
1.554 +char *gcstrchr(char *s,char c);
1.555 void postprocess_for_HTML(char *);
1.556 char *linehasmarkup(char *);
1.557 char *losemarkup(char *);
1.558 -int tagcomp(char *, char *);
1.559 +int tagcomp(char *,char *);
1.560 char *loseentities(char *);
1.561 int isroman(char *);
1.562 int usertypo_count;
1.563 @@ -424,2080 +376,2469 @@
1.564
1.565 char wrk[LINEBUFSIZE];
1.566
1.567 -/* This is disgustingly lazy, predefining max words & lengths, */
1.568 -/* but now I'm out of 16-bit restrictions, what's a couple of K? */
1.569 -#define MAX_QWORD 50
1.570 -#define MAX_QWORD_LENGTH 40
1.571 +#define MAX_QWORD 50
1.572 +#define MAX_QWORD_LENGTH 40
1.573 char qword[MAX_QWORD][MAX_QWORD_LENGTH];
1.574 char qperiod[MAX_QWORD][MAX_QWORD_LENGTH];
1.575 signed int dupcnt[MAX_QWORD];
1.576
1.577 -
1.578 -
1.579 -
1.580 -int main(int argc, char **argv)
1.581 +int main(int argc,char **argv)
1.582 {
1.583 - char *argsw, *s;
1.584 - int i, switno, invarg;
1.585 + char *argsw,*s;
1.586 + int i,switno,invarg;
1.587 char usertypo_file[MAX_PATH];
1.588 FILE *usertypofile;
1.589 -
1.590 -
1.591 - if (strlen(argv[0]) < sizeof(running_from))
1.592 - strcpy(running_from, argv[0]); /* save the path to the executable gutcheck */
1.593 -
1.594 + if (strlen(argv[0])<sizeof(running_from))
1.595 + /* save the path to the executable */
1.596 + strcpy(running_from,argv[0]);
1.597 /* find out what directory we're running from */
1.598 - for (s = running_from + strlen(running_from); *s != '/' && *s != '\\' && s >= running_from; s--)
1.599 - *s = 0;
1.600 -
1.601 -
1.602 - switno = strlen(SWITCHES);
1.603 - for (i = switno ; --i >0 ; )
1.604 - pswit[i] = 0; /* initialise switches */
1.605 -
1.606 - /* Standard loop to extract switches. */
1.607 - /* When we come out of this loop, the arguments will be */
1.608 - /* in argv[0] upwards and the switches used will be */
1.609 - /* represented by their equivalent elements in pswit[] */
1.610 - while ( --argc > 0 && **++argv == '-')
1.611 - for (argsw = argv[0]+1; *argsw !='\0'; argsw++)
1.612 - for (i = switno, invarg = 1; (--i >= 0) && invarg == 1 ; )
1.613 - if ((toupper(*argsw)) == SWITCHES[i] ) {
1.614 - invarg = 0;
1.615 - pswit[i] = 1;
1.616 - }
1.617 -
1.618 - pswit[PARANOID_SWITCH] ^= 1; /* Paranoid checking is turned OFF, not on, by its switch */
1.619 -
1.620 - if (pswit[PARANOID_SWITCH]) { /* if running in paranoid mode */
1.621 - pswit[TYPO_SWITCH] = pswit[TYPO_SWITCH] ^ 1; /* force typo checks as well */
1.622 - } /* v.20 removed s and p switches from paranoid mode */
1.623 -
1.624 - pswit[LINE_END_SWITCH] ^= 1; /* Line-end checking is turned OFF, not on, by its switch */
1.625 - pswit[ECHO_SWITCH] ^= 1; /* V.21 Echoing is turned OFF, not on, by its switch */
1.626 -
1.627 - if (pswit[OVERVIEW_SWITCH]) /* just print summary; don't echo */
1.628 - pswit[ECHO_SWITCH] = 0;
1.629 -
1.630 - /* Web uploads - for the moment, this is really just a placeholder */
1.631 - /* until we decide what processing we really want to do on web uploads */
1.632 - if (pswit[WEB_SWITCH]) { /* specific override for web uploads */
1.633 - pswit[ECHO_SWITCH] = 1;
1.634 - pswit[SQUOTE_SWITCH] = 0;
1.635 - pswit[TYPO_SWITCH] = 1;
1.636 - pswit[QPARA_SWITCH] = 0;
1.637 - pswit[PARANOID_SWITCH] = 1;
1.638 - pswit[LINE_END_SWITCH] = 0;
1.639 - pswit[OVERVIEW_SWITCH] = 0;
1.640 - pswit[STDOUT_SWITCH] = 0;
1.641 - pswit[HEADER_SWITCH] = 1;
1.642 - pswit[VERBOSE_SWITCH] = 0;
1.643 - pswit[MARKUP_SWITCH] = 0;
1.644 - pswit[USERTYPO_SWITCH] = 0;
1.645 - pswit[DP_SWITCH] = 0;
1.646 - }
1.647 -
1.648 -
1.649 - if (argc < MINARGS || argc > MAXARGS) { /* check number of args */
1.650 + s=running_from+strlen(running_from);
1.651 + for (;*s!='/' && *s!='\\' && s>=running_from;s--)
1.652 + *s=0;
1.653 + switno=strlen(SWITCHES);
1.654 + for (i=switno;--i>0;)
1.655 + pswit[i]=0; /* initialise switches */
1.656 + /*
1.657 + * Standard loop to extract switches.
1.658 + * When we come out of this loop, the arguments will be
1.659 + * in argv[0] upwards and the switches used will be
1.660 + * represented by their equivalent elements in pswit[]
1.661 + */
1.662 + while (--argc>0 && **++argv=='-')
1.663 + for (argsw=argv[0]+1;*argsw!='\0';argsw++)
1.664 + for (i=switno,invarg=1;(--i>=0) && invarg==1;)
1.665 + if ((toupper(*argsw))==SWITCHES[i])
1.666 + {
1.667 + invarg=0;
1.668 + pswit[i]=1;
1.669 + }
1.670 + /* Paranoid checking is turned OFF, not on, by its switch */
1.671 + pswit[PARANOID_SWITCH]^=1;
1.672 + if (pswit[PARANOID_SWITCH])
1.673 + /* if running in paranoid mode force typo checks as well */
1.674 + pswit[TYPO_SWITCH]=pswit[TYPO_SWITCH]^1;
1.675 + /* Line-end checking is turned OFF, not on, by its switch */
1.676 + pswit[LINE_END_SWITCH]^=1;
1.677 + /* Echoing is turned OFF, not on, by its switch */
1.678 + pswit[ECHO_SWITCH]^=1;
1.679 + if (pswit[OVERVIEW_SWITCH])
1.680 + /* just print summary; don't echo */
1.681 + pswit[ECHO_SWITCH]=0;
1.682 + /*
1.683 + * Web uploads - for the moment, this is really just a placeholder
1.684 + * until we decide what processing we really want to do on web uploads
1.685 + */
1.686 + if (pswit[WEB_SWITCH])
1.687 + {
1.688 + /* specific override for web uploads */
1.689 + pswit[ECHO_SWITCH]=1;
1.690 + pswit[SQUOTE_SWITCH]=0;
1.691 + pswit[TYPO_SWITCH]=1;
1.692 + pswit[QPARA_SWITCH]=0;
1.693 + pswit[PARANOID_SWITCH]=1;
1.694 + pswit[LINE_END_SWITCH]=0;
1.695 + pswit[OVERVIEW_SWITCH]=0;
1.696 + pswit[STDOUT_SWITCH]=0;
1.697 + pswit[HEADER_SWITCH]=1;
1.698 + pswit[VERBOSE_SWITCH]=0;
1.699 + pswit[MARKUP_SWITCH]=0;
1.700 + pswit[USERTYPO_SWITCH]=0;
1.701 + pswit[DP_SWITCH]=0;
1.702 + }
1.703 + if (argc<MINARGS || argc>MAXARGS)
1.704 + {
1.705 + /* check number of args */
1.706 proghelp();
1.707 - return(1); /* exit */
1.708 - }
1.709 -
1.710 -
1.711 + return 1;
1.712 + }
1.713 /* read in the user-defined stealth scanno list */
1.714 -
1.715 - if (pswit[USERTYPO_SWITCH]) { /* ... we were told we had one! */
1.716 - if ((usertypofile = fopen(USERTYPO_FILE, "rb")) == NULL) { /* not in cwd. try gutcheck directory. */
1.717 - strcpy(usertypo_file, running_from);
1.718 - strcat(usertypo_file, USERTYPO_FILE);
1.719 - if ((usertypofile = fopen(usertypo_file, "rb")) == NULL) { /* we ain't got no user typo file! */
1.720 - printf(" --> I couldn't find gutcheck.typ -- proceeding without user typos.\n");
1.721 - }
1.722 - }
1.723 -
1.724 - usertypo_count = 0;
1.725 - if (usertypofile) { /* we managed to open a User Typo File! */
1.726 - if (pswit[USERTYPO_SWITCH]) {
1.727 - while (flgets(aline, LINEBUFSIZE-1, usertypofile, (long)usertypo_count)) {
1.728 - if (strlen(aline) > 1) {
1.729 - if ((int)*aline > 33) {
1.730 - s = malloc(strlen(aline)+1);
1.731 - if (!s) {
1.732 - fprintf(stderr, "gutcheck: cannot get enough memory for user typo file!!\n");
1.733 + if (pswit[USERTYPO_SWITCH])
1.734 + {
1.735 + /* ... we were told we had one! */
1.736 + usertypofile=fopen(USERTYPO_FILE,"rb");
1.737 + if (!usertypofile)
1.738 + {
1.739 + /* not in cwd. try excuteable directory. */
1.740 + strcpy(usertypo_file,running_from);
1.741 + strcat(usertypo_file,USERTYPO_FILE);
1.742 + usertypofile=fopen(usertypo_file,"rb");
1.743 + if (!usertypofile) {
1.744 + /* we ain't got no user typo file! */
1.745 + printf(" --> I couldn't find gutcheck.typ "
1.746 + "-- proceeding without user typos.\n");
1.747 + }
1.748 + }
1.749 + usertypo_count=0;
1.750 + if (usertypofile)
1.751 + {
1.752 + /* we managed to open a User Typo File! */
1.753 + if (pswit[USERTYPO_SWITCH])
1.754 + {
1.755 + while (flgets(aline,LINEBUFSIZE-1,usertypofile,
1.756 + (long)usertypo_count))
1.757 + {
1.758 + if (strlen(aline)>1)
1.759 + {
1.760 + if ((int)*aline>33)
1.761 + {
1.762 + s=malloc(strlen(aline)+1);
1.763 + if (!s)
1.764 + {
1.765 + fprintf(stderr,"bookloupe: cannot get enough "
1.766 + "memory for user typo file!\n");
1.767 exit(1);
1.768 - }
1.769 - strcpy(s, aline);
1.770 - usertypo[usertypo_count] = s;
1.771 + }
1.772 + strcpy(s,aline);
1.773 + usertypo[usertypo_count]=s;
1.774 usertypo_count++;
1.775 - if (usertypo_count >= MAX_USER_TYPOS) {
1.776 - printf(" --> Only %d user-defined typos allowed: ignoring the rest\n");
1.777 + if (usertypo_count>=MAX_USER_TYPOS)
1.778 + {
1.779 + printf(" --> Only %d user-defined typos "
1.780 + "allowed: ignoring the rest\n");
1.781 break;
1.782 - }
1.783 - }
1.784 - }
1.785 - }
1.786 - }
1.787 + }
1.788 + }
1.789 + }
1.790 + }
1.791 + }
1.792 fclose(usertypofile);
1.793 - }
1.794 - }
1.795 -
1.796 -
1.797 -
1.798 -
1.799 - fprintf(stderr, "gutcheck: Check and report on an e-text\n");
1.800 -
1.801 - cnt_dquot = cnt_squot = cnt_brack = cnt_bin = cnt_odd = cnt_long =
1.802 - cnt_short = cnt_punct = cnt_dash = cnt_word = cnt_html = cnt_lineend =
1.803 - cnt_spacend = 0;
1.804 -
1.805 + }
1.806 + }
1.807 + fprintf(stderr,"bookloupe: Check and report on an e-text\n");
1.808 + cnt_dquot=cnt_squot=cnt_brack=cnt_bin=cnt_odd=cnt_long=
1.809 + cnt_short=cnt_punct=cnt_dash=cnt_word=cnt_html=cnt_lineend=
1.810 + cnt_spacend=0;
1.811 procfile(argv[0]);
1.812 -
1.813 - if (pswit[OVERVIEW_SWITCH]) {
1.814 - printf(" Checked %ld lines of %ld (head+foot = %ld)\n\n",
1.815 - checked_linecnt, linecnt, linecnt - checked_linecnt);
1.816 - printf(" --------------- Queries found --------------\n");
1.817 - if (cnt_long) printf(" Long lines: %5ld\n",cnt_long);
1.818 - if (cnt_short) printf(" Short lines: %5ld\n",cnt_short);
1.819 - if (cnt_lineend) printf(" Line-end problems: %5ld\n",cnt_lineend);
1.820 - if (cnt_word) printf(" Common typos: %5ld\n",cnt_word);
1.821 - if (cnt_dquot) printf(" Unmatched quotes: %5ld\n",cnt_dquot);
1.822 - if (cnt_squot) printf(" Unmatched SingleQuotes: %5ld\n",cnt_squot);
1.823 - if (cnt_brack) printf(" Unmatched brackets: %5ld\n",cnt_brack);
1.824 - if (cnt_bin) printf(" Non-ASCII characters: %5ld\n",cnt_bin);
1.825 - if (cnt_odd) printf(" Proofing characters: %5ld\n",cnt_odd);
1.826 - if (cnt_punct) printf(" Punctuation & spacing queries: %5ld\n",cnt_punct);
1.827 - if (cnt_dash) printf(" Non-standard dashes: %5ld\n",cnt_dash);
1.828 - if (cnt_html) printf(" Possible HTML tags: %5ld\n",cnt_html);
1.829 + if (pswit[OVERVIEW_SWITCH])
1.830 + {
1.831 + printf(" Checked %ld lines of %ld (head+foot = %ld)\n\n",
1.832 + checked_linecnt,linecnt,linecnt-checked_linecnt);
1.833 + printf(" --------------- Queries found --------------\n");
1.834 + if (cnt_long)
1.835 + printf(" Long lines: %14ld\n",cnt_long);
1.836 + if (cnt_short)
1.837 + printf(" Short lines: %14ld\n",cnt_short);
1.838 + if (cnt_lineend)
1.839 + printf(" Line-end problems: %14ld\n",cnt_lineend);
1.840 + if (cnt_word)
1.841 + printf(" Common typos: %14ld\n",cnt_word);
1.842 + if (cnt_dquot)
1.843 + printf(" Unmatched quotes: %14ld\n",cnt_dquot);
1.844 + if (cnt_squot)
1.845 + printf(" Unmatched SingleQuotes: %14ld\n",cnt_squot);
1.846 + if (cnt_brack)
1.847 + printf(" Unmatched brackets: %14ld\n",cnt_brack);
1.848 + if (cnt_bin)
1.849 + printf(" Non-ASCII characters: %14ld\n",cnt_bin);
1.850 + if (cnt_odd)
1.851 + printf(" Proofing characters: %14ld\n",cnt_odd);
1.852 + if (cnt_punct)
1.853 + printf(" Punctuation & spacing queries: %14ld\n",cnt_punct);
1.854 + if (cnt_dash)
1.855 + printf(" Non-standard dashes: %14ld\n",cnt_dash);
1.856 + if (cnt_html)
1.857 + printf(" Possible HTML tags: %14ld\n",cnt_html);
1.858 printf("\n");
1.859 - printf(" TOTAL QUERIES %5ld\n",
1.860 - cnt_dquot + cnt_squot + cnt_brack + cnt_bin + cnt_odd + cnt_long +
1.861 - cnt_short + cnt_punct + cnt_dash + cnt_word + cnt_html + cnt_lineend);
1.862 - }
1.863 -
1.864 - return(0);
1.865 + printf(" TOTAL QUERIES %14ld\n",
1.866 + cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+
1.867 + cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend);
1.868 + }
1.869 + return 0;
1.870 }
1.871
1.872 -
1.873 -
1.874 -/* procfile - process one file */
1.875 -
1.876 +/*
1.877 + * procfile:
1.878 + *
1.879 + * Process one file.
1.880 + */
1.881 void procfile(char *filename)
1.882 {
1.883 -
1.884 - char *s, *t, *s1, laststart, *wordstart;
1.885 - char inword[MAXWORDLEN], testword[MAXWORDLEN];
1.886 + char *s,*t,*s1,laststart,*wordstart;
1.887 + char inword[MAXWORDLEN],testword[MAXWORDLEN];
1.888 char parastart[81]; /* first line of current para */
1.889 FILE *infile;
1.890 - long quot, squot, firstline, alphalen, totlen, binlen,
1.891 - shortline, longline, verylongline, spacedash, emdash,
1.892 - space_emdash, non_PG_space_emdash, PG_space_emdash,
1.893 - footerline, dotcomma, start_para_line, astline, fslashline,
1.894 - standalone_digit, hyphens, htmcount, endquote_count;
1.895 - long spline, nspline;
1.896 - signed int i, j, llen, isemptyline, isacro, isellipsis, istypo, alower,
1.897 - eNon_A, eTab, eTilde, eAst, eFSlash, eCarat;
1.898 - signed int warn_short, warn_long, warn_bin, warn_dash, warn_dotcomma,
1.899 - warn_ast, warn_fslash, warn_digit, warn_hyphen, warn_endquote;
1.900 - unsigned int lastlen, lastblen;
1.901 - signed int s_brack, c_brack, r_brack, c_unders;
1.902 - signed int open_single_quote, close_single_quote, guessquote, dquotepar, squotepar;
1.903 - signed int isnewpara, vowel, consonant;
1.904 - char dquote_err[80], squote_err[80], rbrack_err[80], sbrack_err[80], cbrack_err[80],
1.905 - unders_err[80];
1.906 - signed int qword_index, qperiod_index, isdup;
1.907 + long quot,squot,firstline,alphalen,totlen,binlen,
1.908 + shortline,longline,verylongline,spacedash,emdash,
1.909 + space_emdash,non_PG_space_emdash,PG_space_emdash,
1.910 + footerline,dotcomma,start_para_line,astline,fslashline,
1.911 + standalone_digit,hyphens,htmcount,endquote_count;
1.912 + long spline,nspline;
1.913 + signed int i,j,llen,isemptyline,isacro,isellipsis,istypo,alower,
1.914 + eNon_A,eTab,eTilde,eAst,eFSlash,eCarat;
1.915 + signed int warn_short,warn_long,warn_bin,warn_dash,warn_dotcomma,
1.916 + warn_ast,warn_fslash,warn_digit,warn_hyphen,warn_endquote;
1.917 + unsigned int lastlen,lastblen;
1.918 + signed int s_brack,c_brack,r_brack,c_unders;
1.919 + signed int open_single_quote,close_single_quote,guessquote,dquotepar,
1.920 + squotepar;
1.921 + signed int isnewpara,vowel,consonant;
1.922 + char dquote_err[80],squote_err[80],rbrack_err[80],sbrack_err[80],
1.923 + cbrack_err[80],unders_err[80];
1.924 + signed int qword_index,qperiod_index,isdup;
1.925 signed int enddash;
1.926 - signed int Dutchcount, isDutch, Frenchcount, isFrench;
1.927 -
1.928 -
1.929 -
1.930 -
1.931 -
1.932 - laststart = CHAR_SPACE;
1.933 - lastlen = lastblen = 0;
1.934 - *dquote_err = *squote_err = *rbrack_err = *cbrack_err = *sbrack_err =
1.935 - *unders_err = *prevline = 0;
1.936 - linecnt = firstline = alphalen = totlen = binlen =
1.937 - shortline = longline = spacedash = emdash = checked_linecnt =
1.938 - space_emdash = non_PG_space_emdash = PG_space_emdash =
1.939 - footerline = dotcomma = start_para_line = astline = fslashline =
1.940 - standalone_digit = hyphens = htmcount = endquote_count = 0;
1.941 - quot = squot = s_brack = c_brack = r_brack = c_unders = 0;
1.942 - i = llen = isemptyline = isacro = isellipsis = istypo = 0;
1.943 - warn_short = warn_long = warn_bin = warn_dash = warn_dotcomma =
1.944 - warn_ast = warn_fslash = warn_digit = warn_endquote = 0;
1.945 - isnewpara = vowel = consonant = enddash = 0;
1.946 - spline = nspline = 0;
1.947 - qword_index = qperiod_index = isdup = 0;
1.948 - *inword = *testword = 0;
1.949 - open_single_quote = close_single_quote = guessquote = dquotepar = squotepar = 0;
1.950 - Dutchcount = isDutch = Frenchcount = isFrench = 0;
1.951 -
1.952 -
1.953 - for (j = 0; j < MAX_QWORD; j++) {
1.954 - dupcnt[j] = 0;
1.955 - for (i = 0; i < MAX_QWORD_LENGTH; i++)
1.956 - qword[i][j] = 0;
1.957 - qperiod[i][j] = 0;
1.958 - }
1.959 -
1.960 -
1.961 - if ((infile = fopen(filename, "rb")) == NULL) {
1.962 + signed int Dutchcount,isDutch,Frenchcount,isFrench;
1.963 + laststart=CHAR_SPACE;
1.964 + lastlen=lastblen=0;
1.965 + *dquote_err=*squote_err=*rbrack_err=*cbrack_err=*sbrack_err=
1.966 + *unders_err=*prevline=0;
1.967 + linecnt=firstline=alphalen=totlen=binlen=
1.968 + shortline=longline=spacedash=emdash=checked_linecnt=
1.969 + space_emdash=non_PG_space_emdash=PG_space_emdash=
1.970 + footerline=dotcomma=start_para_line=astline=fslashline=
1.971 + standalone_digit=hyphens=htmcount=endquote_count=0;
1.972 + quot=squot=s_brack=c_brack=r_brack=c_unders=0;
1.973 + i=llen=isemptyline=isacro=isellipsis=istypo=0;
1.974 + warn_short=warn_long=warn_bin=warn_dash=warn_dotcomma=
1.975 + warn_ast=warn_fslash=warn_digit=warn_endquote=0;
1.976 + isnewpara=vowel=consonant=enddash=0;
1.977 + spline=nspline=0;
1.978 + qword_index=qperiod_index=isdup=0;
1.979 + *inword=*testword=0;
1.980 + open_single_quote=close_single_quote=guessquote=dquotepar=squotepar=0;
1.981 + Dutchcount=isDutch=Frenchcount=isFrench=0;
1.982 + for (j=0;j<MAX_QWORD;j++)
1.983 + {
1.984 + dupcnt[j]=0;
1.985 + for (i=0;i<MAX_QWORD_LENGTH;i++)
1.986 + {
1.987 + qword[i][j]=0;
1.988 + qperiod[i][j]=0;
1.989 + }
1.990 + }
1.991 + infile=fopen(filename,"rb");
1.992 + if (!infile)
1.993 + {
1.994 if (pswit[STDOUT_SWITCH])
1.995 - fprintf(stdout, "gutcheck: cannot open %s\n", filename);
1.996 + fprintf(stdout,"bookloupe: cannot open %s\n",filename);
1.997 else
1.998 - fprintf(stderr, "gutcheck: cannot open %s\n", filename);
1.999 - exit(1);
1.1000 - }
1.1001 -
1.1002 - fprintf(stdout, "\n\nFile: %s\n\n", filename);
1.1003 - firstline = shortline = longline = verylongline = 0;
1.1004 -
1.1005 -
1.1006 - /*****************************************************/
1.1007 - /* */
1.1008 - /* Run a first pass - verify that it's a valid PG */
1.1009 - /* file, decide whether to report some things that */
1.1010 - /* occur many times in the text like long or short */
1.1011 - /* lines, non-standard dashes, and other good stuff */
1.1012 - /* I'll doubtless think of later. */
1.1013 - /* */
1.1014 - /*****************************************************/
1.1015 -
1.1016 - /*****************************************************/
1.1017 - /* V.24 Sigh. Yet Another Header Change */
1.1018 - /*****************************************************/
1.1019 -
1.1020 - while (fgets(aline, LINEBUFSIZE-1, infile)) {
1.1021 - while (aline[strlen(aline)-1] == 10 || aline[strlen(aline)-1] == 13 ) aline[strlen(aline)-1] = 0;
1.1022 + fprintf(stderr,"bookloupe: cannot open %s\n",filename);
1.1023 + exit(1);
1.1024 + }
1.1025 + fprintf(stdout,"\n\nFile: %s\n\n",filename);
1.1026 + firstline=shortline=longline=verylongline=0;
1.1027 + /*
1.1028 + * Run a first pass - verify that it's a valid PG
1.1029 + * file, decide whether to report some things that
1.1030 + * occur many times in the text like long or short
1.1031 + * lines, non-standard dashes, etc.
1.1032 + */
1.1033 + while (fgets(aline,LINEBUFSIZE-1,infile))
1.1034 + {
1.1035 + while (aline[strlen(aline)-1]==10 || aline[strlen(aline)-1]==13)
1.1036 + aline[strlen(aline)-1]=0;
1.1037 linecnt++;
1.1038 - if (strstr(aline, "*END") && strstr(aline, "SMALL PRINT") && (strstr(aline, "PUBLIC DOMAIN") || strstr(aline, "COPYRIGHT"))) {
1.1039 + if (strstr(aline,"*END") && strstr(aline,"SMALL PRINT") &&
1.1040 + (strstr(aline,"PUBLIC DOMAIN") || strstr(aline,"COPYRIGHT")))
1.1041 + {
1.1042 if (spline)
1.1043 printf(" --> Duplicate header?\n");
1.1044 - spline = linecnt + 1; /* first line of non-header text, that is */
1.1045 - }
1.1046 - if (!strncmp(aline, "*** START", 9) && strstr(aline, "PROJECT GUTENBERG")) {
1.1047 + spline=linecnt+1; /* first line of non-header text, that is */
1.1048 + }
1.1049 + if (!strncmp(aline,"*** START",9) && strstr(aline,"PROJECT GUTENBERG"))
1.1050 + {
1.1051 if (nspline)
1.1052 printf(" --> Duplicate header?\n");
1.1053 - nspline = linecnt + 1; /* first line of non-header text, that is */
1.1054 - }
1.1055 - if (spline || nspline) {
1.1056 + nspline=linecnt+1; /* first line of non-header text, that is */
1.1057 + }
1.1058 + if (spline || nspline)
1.1059 + {
1.1060 lowerit(aline);
1.1061 - if (strstr(aline, "end") && strstr(aline, "project gutenberg")) {
1.1062 - if (strstr(aline, "end") < strstr(aline, "project gutenberg")) {
1.1063 - if (footerline) {
1.1064 - if (!nspline) /* it's an old-form header - we can detect duplicates */
1.1065 + if (strstr(aline,"end") && strstr(aline,"project gutenberg"))
1.1066 + {
1.1067 + if (strstr(aline,"end")<strstr(aline,"project gutenberg"))
1.1068 + {
1.1069 + if (footerline)
1.1070 + {
1.1071 + /* it's an old-form header - we can detect duplicates */
1.1072 + if (!nspline)
1.1073 printf(" --> Duplicate footer?\n");
1.1074 - else
1.1075 - ;
1.1076 - }
1.1077 - else {
1.1078 - footerline = linecnt;
1.1079 - }
1.1080 - }
1.1081 - }
1.1082 - }
1.1083 - if (spline) firstline = spline;
1.1084 - if (nspline) firstline = nspline; /* override with new */
1.1085 -
1.1086 - if (footerline) continue; /* 0.99+ don't count the boilerplate in the footer */
1.1087 -
1.1088 - llen = strlen(aline);
1.1089 - totlen += llen;
1.1090 - for (i = 0; i < llen; i++) {
1.1091 - if ((unsigned char)aline[i] > 127) binlen++;
1.1092 - if (gcisalpha(aline[i])) alphalen++;
1.1093 - if (i > 0)
1.1094 - if (aline[i] == CHAR_DQUOTE && isalpha(aline[i-1]))
1.1095 - endquote_count++;
1.1096 - }
1.1097 - if (strlen(aline) > 2
1.1098 - && lastlen > 2 && lastlen < SHORTEST_PG_LINE
1.1099 - && lastblen > 2 && lastblen > SHORTEST_PG_LINE
1.1100 - && laststart != CHAR_SPACE)
1.1101 - shortline++;
1.1102 -
1.1103 - if (*aline) /* fixed line below for 0.96 */
1.1104 - if ((unsigned char)aline[strlen(aline)-1] <= CHAR_SPACE) cnt_spacend++;
1.1105 -
1.1106 - if (strstr(aline, ".,")) dotcomma++;
1.1107 - /* 0.98 only count ast lines for ignoring purposes where there is */
1.1108 + }
1.1109 + else
1.1110 + footerline=linecnt;
1.1111 + }
1.1112 + }
1.1113 + }
1.1114 + if (spline)
1.1115 + firstline=spline;
1.1116 + if (nspline)
1.1117 + firstline=nspline; /* override with new */
1.1118 + if (footerline)
1.1119 + continue; /* don't count the boilerplate in the footer */
1.1120 + llen=strlen(aline);
1.1121 + totlen+=llen;
1.1122 + for (i=0;i<llen;i++)
1.1123 + {
1.1124 + if ((unsigned char)aline[i]>127)
1.1125 + binlen++;
1.1126 + if (gcisalpha(aline[i]))
1.1127 + alphalen++;
1.1128 + if (i>0 && aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))
1.1129 + endquote_count++;
1.1130 + }
1.1131 + if (strlen(aline)>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE &&
1.1132 + lastblen>2 && lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE)
1.1133 + shortline++;
1.1134 + if (*aline && (unsigned char)aline[strlen(aline)-1]<=CHAR_SPACE)
1.1135 + cnt_spacend++;
1.1136 + if (strstr(aline,".,"))
1.1137 + dotcomma++;
1.1138 + /* only count ast lines for ignoring purposes where there is */
1.1139 /* locase text on the line */
1.1140 - if (strstr(aline, "*")) {
1.1141 - for (s = aline; *s; s++)
1.1142 - if (*s >='a' && *s <= 'z')
1.1143 + if (strstr(aline,"*"))
1.1144 + {
1.1145 + for (s=aline;*s;s++)
1.1146 + if (*s>='a' && *s<='z')
1.1147 break;
1.1148 - if (*s) astline++;
1.1149 - }
1.1150 - if (strstr(aline, "/"))
1.1151 + if (*s)
1.1152 + astline++;
1.1153 + }
1.1154 + if (strstr(aline,"/"))
1.1155 fslashline++;
1.1156 - for (i = llen-1; i > 0 && (unsigned char)aline[i] <= CHAR_SPACE; i--);
1.1157 - if (aline[i] == '-' && aline[i-1] != '-') hyphens++;
1.1158 -
1.1159 - if (llen > LONGEST_PG_LINE) longline++;
1.1160 - if (llen > WAY_TOO_LONG) verylongline++;
1.1161 -
1.1162 - if (strstr(aline, "<") && strstr(aline, ">")) {
1.1163 - i = (signed int) (strstr(aline, ">") - strstr(aline, "<") + 1);
1.1164 - if (i > 0)
1.1165 + for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)
1.1166 + ;
1.1167 + if (aline[i]=='-' && aline[i-1]!='-')
1.1168 + hyphens++;
1.1169 + if (llen>LONGEST_PG_LINE)
1.1170 + longline++;
1.1171 + if (llen>WAY_TOO_LONG)
1.1172 + verylongline++;
1.1173 + if (strstr(aline,"<") && strstr(aline,">"))
1.1174 + {
1.1175 + i=(signed int)(strstr(aline,">")-strstr(aline,"<")+1);
1.1176 + if (i>0)
1.1177 htmcount++;
1.1178 - if (strstr(aline, "<i>")) htmcount +=4; /* bonus marks! */
1.1179 - }
1.1180 -
1.1181 + if (strstr(aline,"<i>"))
1.1182 + htmcount+=4; /* bonus marks! */
1.1183 + }
1.1184 /* Check for spaced em-dashes */
1.1185 - if (strstr(aline,"--")) {
1.1186 + if (strstr(aline,"--"))
1.1187 + {
1.1188 emdash++;
1.1189 - if (*(strstr(aline, "--")-1) == CHAR_SPACE ||
1.1190 - (*(strstr(aline, "--")+2) == CHAR_SPACE))
1.1191 - space_emdash++;
1.1192 - if (*(strstr(aline, "--")-1) == CHAR_SPACE &&
1.1193 - (*(strstr(aline, "--")+2) == CHAR_SPACE))
1.1194 - non_PG_space_emdash++; /* count of em-dashes with spaces both sides */
1.1195 - if (*(strstr(aline, "--")-1) != CHAR_SPACE &&
1.1196 - (*(strstr(aline, "--")+2) != CHAR_SPACE))
1.1197 - PG_space_emdash++; /* count of PG-type em-dashes with no spaces */
1.1198 - }
1.1199 -
1.1200 - for (s = aline; *s;) {
1.1201 - s = getaword(s, inword);
1.1202 - if (!strcmp(inword, "hij") || !strcmp(inword, "niet"))
1.1203 + if (*(strstr(aline,"--")-1)==CHAR_SPACE ||
1.1204 + (*(strstr(aline,"--")+2)==CHAR_SPACE))
1.1205 + space_emdash++;
1.1206 + if (*(strstr(aline,"--")-1)==CHAR_SPACE &&
1.1207 + (*(strstr(aline,"--")+2)==CHAR_SPACE))
1.1208 + /* count of em-dashes with spaces both sides */
1.1209 + non_PG_space_emdash++;
1.1210 + if (*(strstr(aline,"--")-1)!=CHAR_SPACE &&
1.1211 + (*(strstr(aline,"--")+2)!=CHAR_SPACE))
1.1212 + /* count of PG-type em-dashes with no spaces */
1.1213 + PG_space_emdash++;
1.1214 + }
1.1215 + for (s=aline;*s;)
1.1216 + {
1.1217 + s=getaword(s,inword);
1.1218 + if (!strcmp(inword,"hij") || !strcmp(inword,"niet"))
1.1219 Dutchcount++;
1.1220 - if (!strcmp(inword, "dans") || !strcmp(inword, "avec"))
1.1221 + if (!strcmp(inword,"dans") || !strcmp(inword,"avec"))
1.1222 Frenchcount++;
1.1223 - if (!strcmp(inword, "0") || !strcmp(inword, "1"))
1.1224 + if (!strcmp(inword,"0") || !strcmp(inword,"1"))
1.1225 standalone_digit++;
1.1226 - }
1.1227 -
1.1228 + }
1.1229 /* Check for spaced dashes */
1.1230 - if (strstr(aline," -"))
1.1231 - if (*(strstr(aline, " -")+2) != '-')
1.1232 - spacedash++;
1.1233 - lastblen = lastlen;
1.1234 - lastlen = strlen(aline);
1.1235 - laststart = aline[0];
1.1236 -
1.1237 - }
1.1238 + if (strstr(aline," -") && *(strstr(aline," -")+2)!='-')
1.1239 + spacedash++;
1.1240 + lastblen=lastlen;
1.1241 + lastlen=strlen(aline);
1.1242 + laststart=aline[0];
1.1243 + }
1.1244 fclose(infile);
1.1245 -
1.1246 -
1.1247 /* now, based on this quick view, make some snap decisions */
1.1248 - if (cnt_spacend > 0) {
1.1249 - printf(" --> %ld lines in this file have white space at end\n", cnt_spacend);
1.1250 - }
1.1251 -
1.1252 - warn_dotcomma = 1;
1.1253 - if (dotcomma > 5) {
1.1254 - warn_dotcomma = 0;
1.1255 - printf(" --> %ld lines in this file contain '.,'. Not reporting them.\n", dotcomma);
1.1256 - }
1.1257 -
1.1258 - /* if more than 50 lines, or one-tenth, are short, don't bother reporting them */
1.1259 - warn_short = 1;
1.1260 - if (shortline > 50 || shortline * 10 > linecnt) {
1.1261 - warn_short = 0;
1.1262 - printf(" --> %ld lines in this file are short. Not reporting short lines.\n", shortline);
1.1263 - }
1.1264 -
1.1265 - /* if more than 50 lines, or one-tenth, are long, don't bother reporting them */
1.1266 - warn_long = 1;
1.1267 - if (longline > 50 || longline * 10 > linecnt) {
1.1268 - warn_long = 0;
1.1269 - printf(" --> %ld lines in this file are long. Not reporting long lines.\n", longline);
1.1270 - }
1.1271 -
1.1272 - /* if more than 10 lines contain asterisks, don't bother reporting them V.0.97 */
1.1273 - warn_ast = 1;
1.1274 - if (astline > 10 ) {
1.1275 - warn_ast = 0;
1.1276 - printf(" --> %ld lines in this file contain asterisks. Not reporting them.\n", astline);
1.1277 - }
1.1278 -
1.1279 - /* if more than 10 lines contain forward slashes, don't bother reporting them V.0.99 */
1.1280 - warn_fslash = 1;
1.1281 - if (fslashline > 10 ) {
1.1282 - warn_fslash = 0;
1.1283 - printf(" --> %ld lines in this file contain forward slashes. Not reporting them.\n", fslashline);
1.1284 - }
1.1285 -
1.1286 - /* if more than 20 lines contain unpunctuated endquotes, don't bother reporting them V.0.99 */
1.1287 - warn_endquote = 1;
1.1288 - if (endquote_count > 20 ) {
1.1289 - warn_endquote = 0;
1.1290 - printf(" --> %ld lines in this file contain unpunctuated endquotes. Not reporting them.\n", endquote_count);
1.1291 - }
1.1292 -
1.1293 - /* if more than 15 lines contain standalone digits, don't bother reporting them V.0.97 */
1.1294 - warn_digit = 1;
1.1295 - if (standalone_digit > 10 ) {
1.1296 - warn_digit = 0;
1.1297 - printf(" --> %ld lines in this file contain standalone 0s and 1s. Not reporting them.\n", standalone_digit);
1.1298 - }
1.1299 -
1.1300 - /* if more than 20 lines contain hyphens at end, don't bother reporting them V.0.98 */
1.1301 - warn_hyphen = 1;
1.1302 - if (hyphens > 20 ) {
1.1303 - warn_hyphen = 0;
1.1304 - printf(" --> %ld lines in this file have hyphens at end. Not reporting them.\n", hyphens);
1.1305 - }
1.1306 -
1.1307 - if (htmcount > 20 && !pswit[MARKUP_SWITCH]) {
1.1308 + if (cnt_spacend>0)
1.1309 + printf(" --> %ld lines in this file have white space at end\n",
1.1310 + cnt_spacend);
1.1311 + warn_dotcomma=1;
1.1312 + if (dotcomma>5)
1.1313 + {
1.1314 + warn_dotcomma=0;
1.1315 + printf(" --> %ld lines in this file contain '.,'. "
1.1316 + "Not reporting them.\n",dotcomma);
1.1317 + }
1.1318 + /* if more than 50 lines, or one-tenth, are short,
1.1319 + * don't bother reporting them */
1.1320 + warn_short=1;
1.1321 + if (shortline>50 || shortline*10>linecnt)
1.1322 + {
1.1323 + warn_short=0;
1.1324 + printf(" --> %ld lines in this file are short. "
1.1325 + "Not reporting short lines.\n",shortline);
1.1326 + }
1.1327 + /*
1.1328 + * If more than 50 lines, or one-tenth, are long,
1.1329 + * don't bother reporting them.
1.1330 + */
1.1331 + warn_long=1;
1.1332 + if (longline>50 || longline*10>linecnt)
1.1333 + {
1.1334 + warn_long=0;
1.1335 + printf(" --> %ld lines in this file are long. "
1.1336 + "Not reporting long lines.\n",longline);
1.1337 + }
1.1338 + /* If more than 10 lines contain asterisks, don't bother reporting them. */
1.1339 + warn_ast=1;
1.1340 + if (astline>10)
1.1341 + {
1.1342 + warn_ast=0;
1.1343 + printf(" --> %ld lines in this file contain asterisks. "
1.1344 + "Not reporting them.\n",astline);
1.1345 + }
1.1346 + /*
1.1347 + * If more than 10 lines contain forward slashes,
1.1348 + * don't bother reporting them.
1.1349 + */
1.1350 + warn_fslash=1;
1.1351 + if (fslashline>10)
1.1352 + {
1.1353 + warn_fslash=0;
1.1354 + printf(" --> %ld lines in this file contain forward slashes. "
1.1355 + "Not reporting them.\n",fslashline);
1.1356 + }
1.1357 + /*
1.1358 + * If more than 20 lines contain unpunctuated endquotes,
1.1359 + * don't bother reporting them.
1.1360 + */
1.1361 + warn_endquote=1;
1.1362 + if (endquote_count>20)
1.1363 + {
1.1364 + warn_endquote=0;
1.1365 + printf(" --> %ld lines in this file contain unpunctuated endquotes. "
1.1366 + "Not reporting them.\n",endquote_count);
1.1367 + }
1.1368 + /*
1.1369 + * If more than 15 lines contain standalone digits,
1.1370 + * don't bother reporting them.
1.1371 + */
1.1372 + warn_digit=1;
1.1373 + if (standalone_digit>10)
1.1374 + {
1.1375 + warn_digit=0;
1.1376 + printf(" --> %ld lines in this file contain standalone 0s and 1s. "
1.1377 + "Not reporting them.\n",standalone_digit);
1.1378 + }
1.1379 + /*
1.1380 + * If more than 20 lines contain hyphens at end,
1.1381 + * don't bother reporting them.
1.1382 + */
1.1383 + warn_hyphen=1;
1.1384 + if (hyphens>20)
1.1385 + {
1.1386 + warn_hyphen=0;
1.1387 + printf(" --> %ld lines in this file have hyphens at end. "
1.1388 + "Not reporting them.\n",hyphens);
1.1389 + }
1.1390 + if (htmcount>20 && !pswit[MARKUP_SWITCH])
1.1391 + {
1.1392 printf(" --> Looks like this is HTML. Switching HTML mode ON.\n");
1.1393 - pswit[MARKUP_SWITCH] = 1;
1.1394 - }
1.1395 -
1.1396 - if (verylongline > 0) {
1.1397 - printf(" --> %ld lines in this file are VERY long!\n", verylongline);
1.1398 - }
1.1399 -
1.1400 - /* If there are more non-PG spaced dashes than PG em-dashes, */
1.1401 - /* assume it's deliberate */
1.1402 - /* Current PG guidelines say don't use them, but older texts do,*/
1.1403 - /* and some people insist on them whatever the guidelines say. */
1.1404 - /* V.20 removed requirement that PG_space_emdash be greater than*/
1.1405 - /* ten before turning off warnings about spaced dashes. */
1.1406 - warn_dash = 1;
1.1407 - if (spacedash + non_PG_space_emdash > PG_space_emdash) {
1.1408 - warn_dash = 0;
1.1409 - printf(" --> There are %ld spaced dashes and em-dashes. Not reporting them.\n", spacedash + non_PG_space_emdash);
1.1410 - }
1.1411 -
1.1412 - /* if more than a quarter of characters are hi-bit, bug out */
1.1413 - warn_bin = 1;
1.1414 - if (binlen * 4 > totlen) {
1.1415 - printf(" --> This file does not appear to be ASCII. Terminating. Best of luck with it!\n");
1.1416 + pswit[MARKUP_SWITCH]=1;
1.1417 + }
1.1418 + if (verylongline>0)
1.1419 + printf(" --> %ld lines in this file are VERY long!\n",verylongline);
1.1420 + /*
1.1421 + * If there are more non-PG spaced dashes than PG em-dashes,
1.1422 + * assume it's deliberate.
1.1423 + * Current PG guidelines say don't use them, but older texts do,
1.1424 + * and some people insist on them whatever the guidelines say.
1.1425 + */
1.1426 + warn_dash=1;
1.1427 + if (spacedash+non_PG_space_emdash>PG_space_emdash)
1.1428 + {
1.1429 + warn_dash=0;
1.1430 + printf(" --> There are %ld spaced dashes and em-dashes. "
1.1431 + "Not reporting them.\n",spacedash+non_PG_space_emdash);
1.1432 + }
1.1433 + /* If more than a quarter of characters are hi-bit, bug out. */
1.1434 + warn_bin=1;
1.1435 + if (binlen*4>totlen)
1.1436 + {
1.1437 + printf(" --> This file does not appear to be ASCII. "
1.1438 + "Terminating. Best of luck with it!\n");
1.1439 exit(1);
1.1440 - }
1.1441 - if (alphalen * 4 < totlen) {
1.1442 - printf(" --> This file does not appear to be text. Terminating. Best of luck with it!\n");
1.1443 + }
1.1444 + if (alphalen*4<totlen)
1.1445 + {
1.1446 + printf(" --> This file does not appear to be text. "
1.1447 + "Terminating. Best of luck with it!\n");
1.1448 exit(1);
1.1449 - }
1.1450 - if ((binlen * 100 > totlen) || (binlen > 100)) {
1.1451 - printf(" --> There are a lot of foreign letters here. Not reporting them.\n");
1.1452 - warn_bin = 0;
1.1453 - }
1.1454 -
1.1455 - /* isDutch and isFrench added .991 Feb 06 for Frank, Jeroen, Renald */
1.1456 - isDutch = 0;
1.1457 - if (Dutchcount > 50) {
1.1458 - isDutch = 1;
1.1459 - printf(" --> This looks like Dutch - switching off dashes and warnings for 's Middags case.\n");
1.1460 - }
1.1461 -
1.1462 - isFrench = 0;
1.1463 - if (Frenchcount > 50) {
1.1464 - isFrench = 1;
1.1465 - printf(" --> This looks like French - switching off some doublepunct.\n");
1.1466 - }
1.1467 -
1.1468 + }
1.1469 + if (binlen*100>totlen || binlen>100)
1.1470 + {
1.1471 + printf(" --> There are a lot of foreign letters here. "
1.1472 + "Not reporting them.\n");
1.1473 + warn_bin=0;
1.1474 + }
1.1475 + isDutch=0;
1.1476 + if (Dutchcount>50)
1.1477 + {
1.1478 + isDutch=1;
1.1479 + printf(" --> This looks like Dutch - "
1.1480 + "switching off dashes and warnings for 's Middags case.\n");
1.1481 + }
1.1482 + isFrench=0;
1.1483 + if (Frenchcount>50)
1.1484 + {
1.1485 + isFrench=1;
1.1486 + printf(" --> This looks like French - "
1.1487 + "switching off some doublepunct.\n");
1.1488 + }
1.1489 if (firstline && footerline)
1.1490 printf(" The PG header and footer appear to be already on.\n");
1.1491 - else {
1.1492 + else
1.1493 + {
1.1494 if (firstline)
1.1495 printf(" The PG header is on - no footer.\n");
1.1496 if (footerline)
1.1497 printf(" The PG footer is on - no header.\n");
1.1498 - }
1.1499 + }
1.1500 printf("\n");
1.1501 -
1.1502 - /* V.22 George Davis asked for an override switch to force it to list everything */
1.1503 - if (pswit[VERBOSE_SWITCH]) {
1.1504 - warn_bin = 1;
1.1505 - warn_short = 1;
1.1506 - warn_dotcomma = 1;
1.1507 - warn_long = 1;
1.1508 - warn_dash = 1;
1.1509 - warn_digit = 1;
1.1510 - warn_ast = 1;
1.1511 - warn_fslash = 1;
1.1512 - warn_hyphen = 1;
1.1513 - warn_endquote = 1;
1.1514 + if (pswit[VERBOSE_SWITCH])
1.1515 + {
1.1516 + warn_bin=1;
1.1517 + warn_short=1;
1.1518 + warn_dotcomma=1;
1.1519 + warn_long=1;
1.1520 + warn_dash=1;
1.1521 + warn_digit=1;
1.1522 + warn_ast=1;
1.1523 + warn_fslash=1;
1.1524 + warn_hyphen=1;
1.1525 + warn_endquote=1;
1.1526 printf(" *** Verbose output is ON -- you asked for it! ***\n");
1.1527 - }
1.1528 -
1.1529 + }
1.1530 if (isDutch)
1.1531 - warn_dash = 0; /* Frank suggested turning it REALLY off for Dutch */
1.1532 -
1.1533 - if ((infile = fopen(filename, "rb")) == NULL) {
1.1534 + warn_dash=0;
1.1535 + infile=fopen(filename,"rb");
1.1536 + if (!infile)
1.1537 + {
1.1538 if (pswit[STDOUT_SWITCH])
1.1539 - fprintf(stdout, "gutcheck: cannot open %s\n", filename);
1.1540 + fprintf(stdout,"bookloupe: cannot open %s\n",filename);
1.1541 else
1.1542 - fprintf(stderr, "gutcheck: cannot open %s\n", filename);
1.1543 - exit(1);
1.1544 - }
1.1545 -
1.1546 - if (footerline > 0 && firstline > 0 && footerline > firstline && footerline - firstline < 100) { /* ugh */
1.1547 + fprintf(stderr,"bookloupe: cannot open %s\n",filename);
1.1548 + exit(1);
1.1549 + }
1.1550 + if (footerline>0 && firstline>0 && footerline>firstline &&
1.1551 + footerline-firstline<100)
1.1552 + {
1.1553 printf(" --> I don't really know where this text starts. \n");
1.1554 printf(" There are no reference points.\n");
1.1555 - printf(" I'm going to have to report the header and footer as well.\n");
1.1556 + printf(" I'm going to have to report the header and footer "
1.1557 + "as well.\n");
1.1558 firstline=0;
1.1559 - }
1.1560 -
1.1561 -
1.1562 -
1.1563 - /*****************************************************/
1.1564 - /* */
1.1565 - /* Here we go with the main pass. Hold onto yer hat! */
1.1566 - /* */
1.1567 - /*****************************************************/
1.1568 -
1.1569 - /* Re-init some variables we've dirtied */
1.1570 - quot = squot = linecnt = 0;
1.1571 - laststart = CHAR_SPACE;
1.1572 - lastlen = lastblen = 0;
1.1573 -
1.1574 - while (flgets(aline, LINEBUFSIZE-1, infile, linecnt+1)) {
1.1575 + }
1.1576 + /*
1.1577 + * Here we go with the main pass. Hold onto yer hat!
1.1578 + * Re-init some variables we've dirtied.
1.1579 + */
1.1580 + quot=squot=linecnt=0;
1.1581 + laststart=CHAR_SPACE;
1.1582 + lastlen=lastblen=0;
1.1583 + while (flgets(aline,LINEBUFSIZE-1,infile,linecnt+1))
1.1584 + {
1.1585 linecnt++;
1.1586 - if (linecnt == 1) isnewpara = 1;
1.1587 - if (pswit[DP_SWITCH])
1.1588 - if (!strncmp(aline, "-----File: ", 11))
1.1589 - continue; // skip DP page separators completely
1.1590 - if (linecnt < firstline || (footerline > 0 && linecnt > footerline)) {
1.1591 - if (pswit[HEADER_SWITCH]) {
1.1592 - if (!strncmp(aline, "Title:", 6))
1.1593 - printf(" %s\n", aline);
1.1594 - if (!strncmp (aline, "Author:", 7))
1.1595 - printf(" %s\n", aline);
1.1596 - if (!strncmp(aline, "Release Date:", 13))
1.1597 - printf(" %s\n", aline);
1.1598 - if (!strncmp(aline, "Edition:", 8))
1.1599 - printf(" %s\n\n", aline);
1.1600 - }
1.1601 + if (linecnt==1)
1.1602 + isnewpara=1;
1.1603 + if (pswit[DP_SWITCH] && !strncmp(aline,"-----File: ",11))
1.1604 + continue; // skip DP page separators completely
1.1605 + if (linecnt<firstline || (footerline>0 && linecnt>footerline))
1.1606 + {
1.1607 + if (pswit[HEADER_SWITCH])
1.1608 + {
1.1609 + if (!strncmp(aline,"Title:",6))
1.1610 + printf(" %s\n",aline);
1.1611 + if (!strncmp(aline,"Author:",7))
1.1612 + printf(" %s\n",aline);
1.1613 + if (!strncmp(aline,"Release Date:",13))
1.1614 + printf(" %s\n",aline);
1.1615 + if (!strncmp(aline,"Edition:",8))
1.1616 + printf(" %s\n\n",aline);
1.1617 + }
1.1618 continue; /* skip through the header */
1.1619 - }
1.1620 + }
1.1621 checked_linecnt++;
1.1622 - s = aline;
1.1623 - isemptyline = 1; /* assume the line is empty until proven otherwise */
1.1624 -
1.1625 - /* If we are in a state of unbalanced quotes, and this line */
1.1626 - /* doesn't begin with a quote, output the stored error message */
1.1627 - /* If the -P switch was used, print the warning even if the */
1.1628 - /* new para starts with quotes */
1.1629 - /* Version .20 - if the new paragraph does start with a quote, */
1.1630 - /* but is indented, I was giving a spurious error. Need to */
1.1631 - /* check the first _non-space_ character on the line rather */
1.1632 - /* than the first character when deciding whether the para */
1.1633 - /* starts with a quote. Using *t for this. */
1.1634 - t = s;
1.1635 - while (*t == ' ') t++;
1.1636 + s=aline;
1.1637 + isemptyline=1; /* assume the line is empty until proven otherwise */
1.1638 + /*
1.1639 + * If we are in a state of unbalanced quotes, and this line
1.1640 + * doesn't begin with a quote, output the stored error message.
1.1641 + * If the -P switch was used, print the warning even if the
1.1642 + * new para starts with quotes.
1.1643 + */
1.1644 + t=s;
1.1645 + while (*t==' ')
1.1646 + t++;
1.1647 if (*dquote_err)
1.1648 - if (*t != CHAR_DQUOTE || pswit[QPARA_SWITCH]) {
1.1649 - if (!pswit[OVERVIEW_SWITCH]) {
1.1650 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
1.1651 + if (*t!=CHAR_DQUOTE || pswit[QPARA_SWITCH])
1.1652 + {
1.1653 + if (!pswit[OVERVIEW_SWITCH])
1.1654 + {
1.1655 + if (pswit[ECHO_SWITCH])
1.1656 + printf("\n%s\n",parastart);
1.1657 printf(dquote_err);
1.1658 - }
1.1659 + }
1.1660 else
1.1661 cnt_dquot++;
1.1662 }
1.1663 - if (*squote_err) {
1.1664 - if (*t != CHAR_SQUOTE && *t != CHAR_OPEN_SQUOTE || pswit[QPARA_SWITCH] || squot) {
1.1665 - if (!pswit[OVERVIEW_SWITCH]) {
1.1666 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
1.1667 + if (*squote_err)
1.1668 + {
1.1669 + if (*t!=CHAR_SQUOTE && *t!=CHAR_OPEN_SQUOTE ||
1.1670 + pswit[QPARA_SWITCH] || squot)
1.1671 + {
1.1672 + if (!pswit[OVERVIEW_SWITCH])
1.1673 + {
1.1674 + if (pswit[ECHO_SWITCH])
1.1675 + printf("\n%s\n",parastart);
1.1676 printf(squote_err);
1.1677 - }
1.1678 + }
1.1679 else
1.1680 cnt_squot++;
1.1681 - }
1.1682 - squot = 0;
1.1683 - }
1.1684 - if (*rbrack_err) {
1.1685 - if (!pswit[OVERVIEW_SWITCH]) {
1.1686 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
1.1687 + }
1.1688 + squot=0;
1.1689 + }
1.1690 + if (*rbrack_err)
1.1691 + {
1.1692 + if (!pswit[OVERVIEW_SWITCH])
1.1693 + {
1.1694 + if (pswit[ECHO_SWITCH])
1.1695 + printf("\n%s\n",parastart);
1.1696 printf(rbrack_err);
1.1697 - }
1.1698 + }
1.1699 else
1.1700 cnt_brack++;
1.1701 - }
1.1702 - if (*sbrack_err) {
1.1703 - if (!pswit[OVERVIEW_SWITCH]) {
1.1704 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
1.1705 + }
1.1706 + if (*sbrack_err)
1.1707 + {
1.1708 + if (!pswit[OVERVIEW_SWITCH])
1.1709 + {
1.1710 + if (pswit[ECHO_SWITCH])
1.1711 + printf("\n%s\n",parastart);
1.1712 printf(sbrack_err);
1.1713 - }
1.1714 + }
1.1715 else
1.1716 cnt_brack++;
1.1717 - }
1.1718 - if (*cbrack_err) {
1.1719 - if (!pswit[OVERVIEW_SWITCH]) {
1.1720 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
1.1721 + }
1.1722 + if (*cbrack_err)
1.1723 + {
1.1724 + if (!pswit[OVERVIEW_SWITCH])
1.1725 + {
1.1726 + if (pswit[ECHO_SWITCH])
1.1727 + printf("\n%s\n",parastart);
1.1728 printf(cbrack_err);
1.1729 - }
1.1730 + }
1.1731 else
1.1732 cnt_brack++;
1.1733 - }
1.1734 - if (*unders_err) {
1.1735 - if (!pswit[OVERVIEW_SWITCH]) {
1.1736 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
1.1737 + }
1.1738 + if (*unders_err)
1.1739 + {
1.1740 + if (!pswit[OVERVIEW_SWITCH])
1.1741 + {
1.1742 + if (pswit[ECHO_SWITCH])
1.1743 + printf("\n%s\n",parastart);
1.1744 printf(unders_err);
1.1745 - }
1.1746 + }
1.1747 else
1.1748 cnt_brack++;
1.1749 - }
1.1750 -
1.1751 - *dquote_err = *squote_err = *rbrack_err = *cbrack_err =
1.1752 - *sbrack_err = *unders_err = 0;
1.1753 -
1.1754 -
1.1755 - /* look along the line, accumulate the count of quotes, and see */
1.1756 - /* if this is an empty line - i.e. a line with nothing on it */
1.1757 - /* but spaces. */
1.1758 - /* V .12 also if line has just spaces, * and/or - on it, don't */
1.1759 - /* count it, since empty lines with asterisks or dashes to */
1.1760 - /* separate sections are common. */
1.1761 - /* V .15 new single-quote checking - has to be better than the */
1.1762 - /* previous version, but how much better? fingers crossed! */
1.1763 - /* V .20 add period to * and - as characters on a separator line*/
1.1764 - s = aline;
1.1765 - while (*s) {
1.1766 - if (*s == CHAR_DQUOTE) quot++;
1.1767 - if (*s == CHAR_SQUOTE || *s == CHAR_OPEN_SQUOTE)
1.1768 - if (s == aline) { /* at start of line, it can only be an openquote */
1.1769 - if (strncmp(s+2, "tis", 3) && strncmp(s+2, "Tis", 3)) /* hardcode a very common exception! */
1.1770 + }
1.1771 + *dquote_err=*squote_err=*rbrack_err=*cbrack_err=
1.1772 + *sbrack_err=*unders_err=0;
1.1773 + /*
1.1774 + * Look along the line, accumulate the count of quotes, and see
1.1775 + * if this is an empty line - i.e. a line with nothing on it
1.1776 + * but spaces.
1.1777 + * If line has just spaces, period, * and/or - on it, don't
1.1778 + * count it, since empty lines with asterisks or dashes to
1.1779 + * separate sections are common.
1.1780 + */
1.1781 + s=aline;
1.1782 + while (*s)
1.1783 + {
1.1784 + if (*s==CHAR_DQUOTE)
1.1785 + quot++;
1.1786 + if (*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE)
1.1787 + {
1.1788 + if (s==aline)
1.1789 + {
1.1790 + /*
1.1791 + * At start of line, it can only be an openquote.
1.1792 + * Hardcode a very common exception!
1.1793 + */
1.1794 + if (strncmp(s+2,"tis",3) && strncmp(s+2,"Tis",3))
1.1795 open_single_quote++;
1.1796 - }
1.1797 - else
1.1798 - if (gcisalpha(*(s-1)) && gcisalpha(*(s+1)))
1.1799 - ; /* do nothing! - it's definitely an apostrophe, not a quote */
1.1800 - else /* it's outside a word - let's check it out */
1.1801 - if (*s == CHAR_OPEN_SQUOTE || gcisalpha(*(s+1))) { /* it damwell better BE an openquote */
1.1802 - if (strncmp(s+1, "tis", 3) && strncmp(s+1, "Tis", 3)) /* hardcode a very common exception! */
1.1803 - open_single_quote++;
1.1804 - }
1.1805 - else { /* now - is it a closequote? */
1.1806 - guessquote = 0; /* accumulate clues */
1.1807 - if (gcisalpha(*(s-1))) { /* it follows a letter - could be either */
1.1808 - guessquote += 1;
1.1809 - if (*(s-1) == 's') { /* looks like a plural apostrophe */
1.1810 - guessquote -= 3;
1.1811 - if (*(s+1) == CHAR_SPACE) /* bonus marks! */
1.1812 - guessquote -= 2;
1.1813 - }
1.1814 - }
1.1815 - else /* it doesn't have a letter either side */
1.1816 - if (strchr(".?!,;:", *(s-1)) && (strchr(".?!,;: ", *(s+1))))
1.1817 - guessquote += 8; /* looks like a closequote */
1.1818 - else
1.1819 - guessquote += 1;
1.1820 - if (open_single_quote > close_single_quote)
1.1821 - guessquote += 1; /* give it the benefit of some doubt - if a squote is already open */
1.1822 - else
1.1823 - guessquote -= 1;
1.1824 - if (guessquote >= 0)
1.1825 - close_single_quote++;
1.1826 - }
1.1827 -
1.1828 - if (*s != CHAR_SPACE
1.1829 - && *s != '-'
1.1830 - && *s != '.'
1.1831 - && *s != CHAR_ASTERISK
1.1832 - && *s != 13
1.1833 - && *s != 10) isemptyline = 0; /* ignore lines like * * * as spacers */
1.1834 - if (*s == CHAR_UNDERSCORE) c_unders++;
1.1835 - if (*s == CHAR_OPEN_CBRACK) c_brack++;
1.1836 - if (*s == CHAR_CLOSE_CBRACK) c_brack--;
1.1837 - if (*s == CHAR_OPEN_RBRACK) r_brack++;
1.1838 - if (*s == CHAR_CLOSE_RBRACK) r_brack--;
1.1839 - if (*s == CHAR_OPEN_SBRACK) s_brack++;
1.1840 - if (*s == CHAR_CLOSE_SBRACK) s_brack--;
1.1841 - s++;
1.1842 - }
1.1843 -
1.1844 - if (isnewpara && !isemptyline) { /* This line is the start of a new paragraph */
1.1845 - start_para_line = linecnt;
1.1846 - strncpy(parastart, aline, 80); /* Capture its first line in case we want to report it later */
1.1847 - parastart[79] = 0;
1.1848 - dquotepar = squotepar = 0; /* restart the quote count 0.98 */
1.1849 - s = aline;
1.1850 - while (!gcisalpha(*s) && !gcisdigit(*s) && *s) s++; /* V.97 fixed bug - overran line and gave false warning - rare */
1.1851 - if (*s >= 'a' && *s <='z') { /* and its first letter is lowercase */
1.1852 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.1853 + }
1.1854 + else if (gcisalpha(*(s-1)) && gcisalpha(*(s+1)))
1.1855 + /* Do nothing! it's definitely an apostrophe, not a quote */
1.1856 + ;
1.1857 + /* it's outside a word - let's check it out */
1.1858 + else if (*s==CHAR_OPEN_SQUOTE || gcisalpha(*(s+1)))
1.1859 + {
1.1860 + /* it damwell better BE an openquote */
1.1861 + if (strncmp(s+1,"tis",3) && strncmp(s+1,"Tis",3))
1.1862 + /* hardcode a very common exception! */
1.1863 + open_single_quote++;
1.1864 + }
1.1865 + else
1.1866 + {
1.1867 + /* now - is it a closequote? */
1.1868 + guessquote=0; /* accumulate clues */
1.1869 + if (gcisalpha(s[-1]))
1.1870 + {
1.1871 + /* it follows a letter - could be either */
1.1872 + guessquote+=1;
1.1873 + if (s[-1]=='s')
1.1874 + {
1.1875 + /* looks like a plural apostrophe */
1.1876 + guessquote-=3;
1.1877 + if (s[1]==CHAR_SPACE) /* bonus marks! */
1.1878 + guessquote-=2;
1.1879 + }
1.1880 + }
1.1881 + /* it doesn't have a letter either side */
1.1882 + else if (strchr(".?!,;:",s[-1]) && strchr(".?!,;: ",s[1]))
1.1883 + guessquote+=8; /* looks like a closequote */
1.1884 + else
1.1885 + guessquote++;
1.1886 + if (open_single_quote>close_single_quote)
1.1887 + /*
1.1888 + * Give it the benefit of some doubt,
1.1889 + * if a squote is already open.
1.1890 + */
1.1891 + guessquote++;
1.1892 + else
1.1893 + guessquote--;
1.1894 + if (guessquote>=0)
1.1895 + close_single_quote++;
1.1896 + }
1.1897 + }
1.1898 + if (*s!=CHAR_SPACE && *s!='-' && *s!='.' && *s!=CHAR_ASTERISK &&
1.1899 + *s!=13 && *s!=10)
1.1900 + isemptyline=0; /* ignore lines like * * * as spacers */
1.1901 + if (*s==CHAR_UNDERSCORE)
1.1902 + c_unders++;
1.1903 + if (*s==CHAR_OPEN_CBRACK)
1.1904 + c_brack++;
1.1905 + if (*s==CHAR_CLOSE_CBRACK)
1.1906 + c_brack--;
1.1907 + if (*s==CHAR_OPEN_RBRACK)
1.1908 + r_brack++;
1.1909 + if (*s==CHAR_CLOSE_RBRACK)
1.1910 + r_brack--;
1.1911 + if (*s==CHAR_OPEN_SBRACK)
1.1912 + s_brack++;
1.1913 + if (*s==CHAR_CLOSE_SBRACK)
1.1914 + s_brack--;
1.1915 + s++;
1.1916 + }
1.1917 + if (isnewpara && !isemptyline)
1.1918 + {
1.1919 + /* This line is the start of a new paragraph. */
1.1920 + start_para_line=linecnt;
1.1921 + /* Capture its first line in case we want to report it later. */
1.1922 + strncpy(parastart,aline,80);
1.1923 + parastart[79]=0;
1.1924 + dquotepar=squotepar=0; /* restart the quote count */
1.1925 + s=aline;
1.1926 + while (!gcisalpha(*s) && !gcisdigit(*s) && *s)
1.1927 + s++;
1.1928 + if (*s>='a' && *s<='z')
1.1929 + {
1.1930 + /* and its first letter is lowercase */
1.1931 + if (pswit[ECHO_SWITCH])
1.1932 + printf("\n%s\n",aline);
1.1933 if (!pswit[OVERVIEW_SWITCH])
1.1934 - printf(" Line %ld column %d - Paragraph starts with lower-case\n", linecnt, (int)(s - aline) +1);
1.1935 + printf(" Line %ld column %d - "
1.1936 + "Paragraph starts with lower-case\n",
1.1937 + linecnt,(int)(s-aline)+1);
1.1938 else
1.1939 cnt_punct++;
1.1940 - }
1.1941 - isnewpara = 0; /* Signal the end of new para processing */
1.1942 - }
1.1943 -
1.1944 - /* Check for an em-dash broken at line end */
1.1945 - if (enddash && *aline == '-') {
1.1946 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.1947 + }
1.1948 + isnewpara=0; /* Signal the end of new para processing. */
1.1949 + }
1.1950 + /* Check for an em-dash broken at line end. */
1.1951 + if (enddash && *aline=='-')
1.1952 + {
1.1953 + if (pswit[ECHO_SWITCH])
1.1954 + printf("\n%s\n",aline);
1.1955 if (!pswit[OVERVIEW_SWITCH])
1.1956 - printf(" Line %ld column 1 - Broken em-dash?\n", linecnt);
1.1957 + printf(" Line %ld column 1 - Broken em-dash?\n",linecnt);
1.1958 else
1.1959 cnt_punct++;
1.1960 - }
1.1961 - enddash = 0;
1.1962 - for (s = aline + strlen(aline) - 1; *s == ' ' && s > aline; s--);
1.1963 - if (s >= aline && *s == '-')
1.1964 - enddash = 1;
1.1965 -
1.1966 -
1.1967 - /* Check for invalid or questionable characters in the line */
1.1968 - /* Anything above 127 is invalid for plain ASCII, and */
1.1969 - /* non-printable control characters should also be flagged. */
1.1970 - /* Tabs should generally not be there. */
1.1971 - /* Jan 06, in 0.99: Hm. For some strange reason, I either */
1.1972 - /* never created or deleted the check for unprintable */
1.1973 - /* control characters. They should be reported even if */
1.1974 - /* warn_bin is on, I think, and in full. */
1.1975 -
1.1976 - for (s = aline; *s; s++) {
1.1977 - i = (unsigned char) *s;
1.1978 - if (i < CHAR_SPACE && i != CHAR_LF && i != CHAR_CR && i != CHAR_TAB) {
1.1979 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.1980 + }
1.1981 + enddash=0;
1.1982 + for (s=aline+strlen(aline)-1;*s==' ' && s>aline;s--)
1.1983 + ;
1.1984 + if (s>=aline && *s=='-')
1.1985 + enddash=1;
1.1986 + /*
1.1987 + * Check for invalid or questionable characters in the line
1.1988 + * Anything above 127 is invalid for plain ASCII, and
1.1989 + * non-printable control characters should also be flagged.
1.1990 + * Tabs should generally not be there.
1.1991 + */
1.1992 + for (s=aline;*s;s++)
1.1993 + {
1.1994 + i=(unsigned char)*s;
1.1995 + if (i<CHAR_SPACE && i!=CHAR_LF && i!=CHAR_CR && i!=CHAR_TAB)
1.1996 + {
1.1997 + if (pswit[ECHO_SWITCH])
1.1998 + printf("\n%s\n",aline);
1.1999 if (!pswit[OVERVIEW_SWITCH])
1.2000 - printf(" Line %ld column %d - Control character %d\n", linecnt, (int) (s - aline) + 1, i);
1.2001 + printf(" Line %ld column %d - Control character %d\n",
1.2002 + linecnt,(int)(s-aline)+1,i);
1.2003 else
1.2004 cnt_bin++;
1.2005 - }
1.2006 - }
1.2007 -
1.2008 - if (warn_bin) {
1.2009 - eNon_A = eTab = eTilde = eCarat = eFSlash = eAst = 0; /* don't repeat multiple warnings on one line */
1.2010 - for (s = aline; *s; s++) {
1.2011 - if (!eNon_A && ((*s < CHAR_SPACE && *s != 9 && *s != '\n') || (unsigned char)*s > 127)) {
1.2012 - i = *s; /* annoying kludge for signed chars */
1.2013 - if (i < 0) i += 256;
1.2014 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.2015 + }
1.2016 + }
1.2017 + if (warn_bin)
1.2018 + {
1.2019 + /* Don't repeat multiple warnings on one line. */
1.2020 + eNon_A=eTab=eTilde=eCarat=eFSlash=eAst=0;
1.2021 + for (s=aline;*s;s++)
1.2022 + {
1.2023 + if (!eNon_A &&
1.2024 + (*s<CHAR_SPACE && *s!=9 && *s!='\n' || (unsigned char)*s>127))
1.2025 + {
1.2026 + i=*s; /* annoying kludge for signed chars */
1.2027 + if (i<0)
1.2028 + i+=256;
1.2029 + if (pswit[ECHO_SWITCH])
1.2030 + printf("\n%s\n",aline);
1.2031 if (!pswit[OVERVIEW_SWITCH])
1.2032 - if (i > 127 && i < 160)
1.2033 - printf(" Line %ld column %d - Non-ISO-8859 character %d\n", linecnt, (int) (s - aline) + 1, i);
1.2034 + if (i>127 && i<160)
1.2035 + printf(" Line %ld column %d - "
1.2036 + "Non-ISO-8859 character %d\n",
1.2037 + linecnt,(int)(s-aline)+1,i);
1.2038 else
1.2039 - printf(" Line %ld column %d - Non-ASCII character %d\n", linecnt, (int) (s - aline) + 1, i);
1.2040 + printf(" Line %ld column %d - "
1.2041 + "Non-ASCII character %d\n",
1.2042 + linecnt,(int)(s-aline)+1,i);
1.2043 else
1.2044 cnt_bin++;
1.2045 - eNon_A = 1;
1.2046 - }
1.2047 - if (!eTab && *s == CHAR_TAB) {
1.2048 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.2049 + eNon_A=1;
1.2050 + }
1.2051 + if (!eTab && *s==CHAR_TAB)
1.2052 + {
1.2053 + if (pswit[ECHO_SWITCH])
1.2054 + printf("\n%s\n",aline);
1.2055 if (!pswit[OVERVIEW_SWITCH])
1.2056 - printf(" Line %ld column %d - Tab character?\n", linecnt, (int) (s - aline) + 1);
1.2057 + printf(" Line %ld column %d - Tab character?\n",
1.2058 + linecnt,(int)(s-aline)+1);
1.2059 else
1.2060 cnt_odd++;
1.2061 - eTab = 1;
1.2062 - }
1.2063 - if (!eTilde && *s == CHAR_TILDE) { /* often used by OCR software to indicate an unrecognizable character */
1.2064 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.2065 + eTab=1;
1.2066 + }
1.2067 + if (!eTilde && *s==CHAR_TILDE)
1.2068 + {
1.2069 + /*
1.2070 + * Often used by OCR software to indicate an
1.2071 + * unrecognizable character.
1.2072 + */
1.2073 + if (pswit[ECHO_SWITCH])
1.2074 + printf("\n%s\n",aline);
1.2075 if (!pswit[OVERVIEW_SWITCH])
1.2076 - printf(" Line %ld column %d - Tilde character?\n", linecnt, (int) (s - aline) + 1);
1.2077 + printf(" Line %ld column %d - Tilde character?\n",
1.2078 + linecnt,(int)(s-aline)+1);
1.2079 else
1.2080 cnt_odd++;
1.2081 - eTilde = 1;
1.2082 - }
1.2083 - if (!eCarat && *s == CHAR_CARAT) {
1.2084 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.2085 + eTilde=1;
1.2086 + }
1.2087 + if (!eCarat && *s==CHAR_CARAT)
1.2088 + {
1.2089 + if (pswit[ECHO_SWITCH])
1.2090 + printf("\n%s\n",aline);
1.2091 if (!pswit[OVERVIEW_SWITCH])
1.2092 - printf(" Line %ld column %d - Carat character?\n", linecnt, (int) (s - aline) + 1);
1.2093 + printf(" Line %ld column %d - Carat character?\n",
1.2094 + linecnt,(int)(s-aline)+1);
1.2095 else
1.2096 cnt_odd++;
1.2097 - eCarat = 1;
1.2098 - }
1.2099 - if (!eFSlash && *s == CHAR_FORESLASH && warn_fslash) {
1.2100 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.2101 + eCarat=1;
1.2102 + }
1.2103 + if (!eFSlash && *s==CHAR_FORESLASH && warn_fslash)
1.2104 + {
1.2105 + if (pswit[ECHO_SWITCH])
1.2106 + printf("\n%s\n",aline);
1.2107 if (!pswit[OVERVIEW_SWITCH])
1.2108 - printf(" Line %ld column %d - Forward slash?\n", linecnt, (int) (s - aline) + 1);
1.2109 + printf(" Line %ld column %d - Forward slash?\n",
1.2110 + linecnt,(int)(s-aline)+1);
1.2111 else
1.2112 cnt_odd++;
1.2113 - eFSlash = 1;
1.2114 - }
1.2115 - /* report asterisks only in paranoid mode, since they're often deliberate */
1.2116 - if (!eAst && pswit[PARANOID_SWITCH] && warn_ast && !isemptyline && *s == CHAR_ASTERISK) {
1.2117 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.2118 + eFSlash=1;
1.2119 + }
1.2120 + /*
1.2121 + * Report asterisks only in paranoid mode,
1.2122 + * since they're often deliberate.
1.2123 + */
1.2124 + if (!eAst && pswit[PARANOID_SWITCH] && warn_ast &&
1.2125 + !isemptyline && *s==CHAR_ASTERISK)
1.2126 + {
1.2127 + if (pswit[ECHO_SWITCH])
1.2128 + printf("\n%s\n",aline);
1.2129 if (!pswit[OVERVIEW_SWITCH])
1.2130 - printf(" Line %ld column %d - Asterisk?\n", linecnt, (int) (s - aline) + 1);
1.2131 + printf(" Line %ld column %d - Asterisk?\n",
1.2132 + linecnt,(int)(s-aline)+1);
1.2133 else
1.2134 cnt_odd++;
1.2135 - eAst = 1;
1.2136 - }
1.2137 - }
1.2138 - }
1.2139 -
1.2140 - /* Check for line too long */
1.2141 - if (warn_long) {
1.2142 - if (strlen(aline) > LONGEST_PG_LINE) {
1.2143 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.2144 + eAst=1;
1.2145 + }
1.2146 + }
1.2147 + }
1.2148 + /* Check for line too long. */
1.2149 + if (warn_long)
1.2150 + {
1.2151 + if (strlen(aline)>LONGEST_PG_LINE)
1.2152 + {
1.2153 + if (pswit[ECHO_SWITCH])
1.2154 + printf("\n%s\n",aline);
1.2155 if (!pswit[OVERVIEW_SWITCH])
1.2156 - printf(" Line %ld column %d - Long line %d\n", linecnt, strlen(aline), strlen(aline));
1.2157 + printf(" Line %ld column %d - Long line %d\n",
1.2158 + linecnt,strlen(aline),strlen(aline));
1.2159 else
1.2160 cnt_long++;
1.2161 - }
1.2162 - }
1.2163 -
1.2164 - /* Check for line too short. */
1.2165 - /* This one is a bit trickier to implement: we don't want to */
1.2166 - /* flag the last line of a paragraph for being short, so we */
1.2167 - /* have to wait until we know that our current line is a */
1.2168 - /* "normal" line, then report the _previous_ line if it was too */
1.2169 - /* short. We also don't want to report indented lines like */
1.2170 - /* chapter heads or formatted quotations. We therefore keep */
1.2171 - /* lastlen as the length of the last line examined, and */
1.2172 - /* lastblen as the length of the last but one, and try to */
1.2173 - /* suppress unnecessary warnings by checking that both were of */
1.2174 - /* "normal" length. We keep the first character of the last */
1.2175 - /* line in laststart, and if it was a space, we assume that the */
1.2176 - /* formatting is deliberate. I can't figure out a way to */
1.2177 - /* distinguish something like a quoted verse left-aligned or */
1.2178 - /* the header or footer of a letter from a paragraph of short */
1.2179 - /* lines - maybe if I examined the whole paragraph, and if the */
1.2180 - /* para has less than, say, 8 lines and if all lines are short, */
1.2181 - /* then just assume it's OK? Need to look at some texts to see */
1.2182 - /* how often a formula like this would get the right result. */
1.2183 - /* V0.99 changed the tolerance for length to ignore from 2 to 1 */
1.2184 - if (warn_short) {
1.2185 - if (strlen(aline) > 1
1.2186 - && lastlen > 1 && lastlen < SHORTEST_PG_LINE
1.2187 - && lastblen > 1 && lastblen > SHORTEST_PG_LINE
1.2188 - && laststart != CHAR_SPACE) {
1.2189 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", prevline);
1.2190 + }
1.2191 + }
1.2192 + /*
1.2193 + * Check for line too short.
1.2194 + * This one is a bit trickier to implement: we don't want to
1.2195 + * flag the last line of a paragraph for being short, so we
1.2196 + * have to wait until we know that our current line is a
1.2197 + * "normal" line, then report the _previous_ line if it was too
1.2198 + * short. We also don't want to report indented lines like
1.2199 + * chapter heads or formatted quotations. We therefore keep
1.2200 + * lastlen as the length of the last line examined, and
1.2201 + * lastblen as the length of the last but one, and try to
1.2202 + * suppress unnecessary warnings by checking that both were of
1.2203 + * "normal" length. We keep the first character of the last
1.2204 + * line in laststart, and if it was a space, we assume that the
1.2205 + * formatting is deliberate. I can't figure out a way to
1.2206 + * distinguish something like a quoted verse left-aligned or
1.2207 + * the header or footer of a letter from a paragraph of short
1.2208 + * lines - maybe if I examined the whole paragraph, and if the
1.2209 + * para has less than, say, 8 lines and if all lines are short,
1.2210 + * then just assume it's OK? Need to look at some texts to see
1.2211 + * how often a formula like this would get the right result.
1.2212 + */
1.2213 + if (warn_short && strlen(aline)>1 && lastlen>1 &&
1.2214 + lastlen<SHORTEST_PG_LINE && lastblen>1 && lastblen>SHORTEST_PG_LINE &&
1.2215 + laststart!=CHAR_SPACE)
1.2216 + {
1.2217 + if (pswit[ECHO_SWITCH])
1.2218 + printf("\n%s\n",prevline);
1.2219 + if (!pswit[OVERVIEW_SWITCH])
1.2220 + printf(" Line %ld column %d - Short line %d?\n",
1.2221 + linecnt-1,strlen(prevline),strlen(prevline));
1.2222 + else
1.2223 + cnt_short++;
1.2224 + }
1.2225 + lastblen=lastlen;
1.2226 + lastlen=strlen(aline);
1.2227 + laststart=aline[0];
1.2228 + /* Look for punctuation other than full ellipses at start of line. */
1.2229 + if (*aline && strchr(".?!,;:",aline[0]) && strncmp(". . .",aline,5))
1.2230 + {
1.2231 + if (pswit[ECHO_SWITCH])
1.2232 + printf("\n%s\n",aline);
1.2233 + if (!pswit[OVERVIEW_SWITCH])
1.2234 + printf(" Line %ld column 1 - Begins with punctuation?\n",
1.2235 + linecnt);
1.2236 + else
1.2237 + cnt_punct++;
1.2238 + }
1.2239 + /*
1.2240 + * Check for spaced em-dashes.
1.2241 + * We must check _all_ occurrences of "--" on the line
1.2242 + * hence the loop - even if the first double-dash is OK
1.2243 + * there may be another that's wrong later on.
1.2244 + */
1.2245 + if (warn_dash)
1.2246 + {
1.2247 + s=aline;
1.2248 + while (strstr(s,"--"))
1.2249 + {
1.2250 + if (*(strstr(s,"--")-1)==CHAR_SPACE ||
1.2251 + (*(strstr(s,"--")+2)==CHAR_SPACE))
1.2252 + {
1.2253 + if (pswit[ECHO_SWITCH])
1.2254 + printf("\n%s\n",aline);
1.2255 if (!pswit[OVERVIEW_SWITCH])
1.2256 - printf(" Line %ld column %d - Short line %d?\n", linecnt-1, strlen(prevline), strlen(prevline));
1.2257 + printf(" Line %ld column %d - Spaced em-dash?\n",
1.2258 + linecnt,(int)(strstr(s,"--")-aline)+1);
1.2259 else
1.2260 - cnt_short++;
1.2261 - }
1.2262 - }
1.2263 - lastblen = lastlen;
1.2264 - lastlen = strlen(aline);
1.2265 - laststart = aline[0];
1.2266 -
1.2267 - /* look for punctuation at start of line */
1.2268 - if (*aline && strchr(".?!,;:", aline[0])) { /* if it's punctuation */
1.2269 - if (strncmp(". . .", aline, 5)) { /* exception for ellipsis: V.98 tightened up to except only a full ellipsis */
1.2270 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.2271 + cnt_dash++;
1.2272 + }
1.2273 + s=strstr(s,"--")+2;
1.2274 + }
1.2275 + }
1.2276 + /* Check for spaced dashes. */
1.2277 + if (warn_dash)
1.2278 + {
1.2279 + if (strstr(aline," -"))
1.2280 + {
1.2281 + if (*(strstr(aline," -")+2)!='-')
1.2282 + {
1.2283 + if (pswit[ECHO_SWITCH])
1.2284 + printf("\n%s\n",aline);
1.2285 + if (!pswit[OVERVIEW_SWITCH])
1.2286 + printf(" Line %ld column %d - Spaced dash?\n",
1.2287 + linecnt,(int)(strstr(aline," -")-aline)+1);
1.2288 + else
1.2289 + cnt_dash++;
1.2290 + }
1.2291 + }
1.2292 + else if (strstr(aline,"- "))
1.2293 + {
1.2294 + if (*(strstr(aline,"- ")-1)!='-')
1.2295 + {
1.2296 + if (pswit[ECHO_SWITCH])
1.2297 + printf("\n%s\n",aline);
1.2298 + if (!pswit[OVERVIEW_SWITCH])
1.2299 + printf(" Line %ld column %d - Spaced dash?\n",
1.2300 + linecnt,(int)(strstr(aline,"- ")-aline)+1);
1.2301 + else
1.2302 + cnt_dash++;
1.2303 + }
1.2304 + }
1.2305 + }
1.2306 + /*
1.2307 + * Check for unmarked paragraphs indicated by separate speakers.
1.2308 + * May well be false positive:
1.2309 + * "Bravo!" "Wonderful!" called the crowd.
1.2310 + * but useful all the same.
1.2311 + */
1.2312 + s=wrk;
1.2313 + *s=0;
1.2314 + if (strstr(aline,"\" \""))
1.2315 + s=strstr(aline,"\" \"");
1.2316 + if (strstr(aline,"\" \""))
1.2317 + s=strstr(aline,"\" \"");
1.2318 + if (*s)
1.2319 + {
1.2320 + if (pswit[ECHO_SWITCH])
1.2321 + printf("\n%s\n",aline);
1.2322 + if (!pswit[OVERVIEW_SWITCH])
1.2323 + printf(" Line %ld column %d - "
1.2324 + "Query missing paragraph break?\n",
1.2325 + linecnt,(int)(s-aline)+1);
1.2326 + else
1.2327 + cnt_punct++;
1.2328 + }
1.2329 + /*
1.2330 + * Check for "to he" and other easy he/be errors.
1.2331 + * This is a very inadequate effort on the he/be problem,
1.2332 + * but the phrase "to he" is always an error, whereas "to
1.2333 + * be" is quite common.
1.2334 + * Similarly, '"Quiet!", be said.' is a non-be error
1.2335 + * "to he" is _not_ always an error!:
1.2336 + * "Where they went to he couldn't say."
1.2337 + * Another false positive:
1.2338 + * What would "Cinderella" be without the . . .
1.2339 + * and another: "If he wants to he can see for himself."
1.2340 + */
1.2341 + s=wrk;
1.2342 + *s=0;
1.2343 + if (strstr(aline," to he "))
1.2344 + s=strstr(aline," to he ");
1.2345 + if (strstr(aline,"\" be "))
1.2346 + s=strstr(aline,"\" be ");
1.2347 + if (strstr(aline,"\", be "))
1.2348 + s=strstr(aline,"\", be ");
1.2349 + if (strstr(aline," is be "))
1.2350 + s=strstr(aline," is be ");
1.2351 + if (strstr(aline," be is "))
1.2352 + s=strstr(aline," be is ");
1.2353 + if (strstr(aline," was be "))
1.2354 + s=strstr(aline," was be ");
1.2355 + if (strstr(aline," be would "))
1.2356 + s=strstr(aline," be would ");
1.2357 + if (strstr(aline," be could "))
1.2358 + s=strstr(aline," be could ");
1.2359 + if (*s)
1.2360 + {
1.2361 + if (pswit[ECHO_SWITCH])
1.2362 + printf("\n%s\n",aline);
1.2363 + if (!pswit[OVERVIEW_SWITCH])
1.2364 + printf(" Line %ld column %d - Query he/be error?\n",
1.2365 + linecnt,(int)(s-aline)+1);
1.2366 + else
1.2367 + cnt_word++;
1.2368 + }
1.2369 + s=wrk;
1.2370 + *s=0;
1.2371 + if (strstr(aline," i bad "))
1.2372 + s=strstr(aline," i bad ");
1.2373 + if (strstr(aline," you bad "))
1.2374 + s=strstr(aline," you bad ");
1.2375 + if (strstr(aline," he bad "))
1.2376 + s=strstr(aline," he bad ");
1.2377 + if (strstr(aline," she bad "))
1.2378 + s=strstr(aline," she bad ");
1.2379 + if (strstr(aline," they bad "))
1.2380 + s=strstr(aline," they bad ");
1.2381 + if (strstr(aline," a had "))
1.2382 + s=strstr(aline," a had ");
1.2383 + if (strstr(aline," the had "))
1.2384 + s=strstr(aline," the had ");
1.2385 + if (*s)
1.2386 + {
1.2387 + if (pswit[ECHO_SWITCH])
1.2388 + printf("\n%s\n",aline);
1.2389 + if (!pswit[OVERVIEW_SWITCH])
1.2390 + printf(" Line %ld column %d - Query had/bad error?\n",
1.2391 + linecnt,(int)(s-aline)+1);
1.2392 + else
1.2393 + cnt_word++;
1.2394 + }
1.2395 + s=wrk;
1.2396 + *s=0;
1.2397 + if (strstr(aline,", hut "))
1.2398 + s=strstr(aline,", hut ");
1.2399 + if (strstr(aline,"; hut "))
1.2400 + s=strstr(aline,"; hut ");
1.2401 + if (*s)
1.2402 + {
1.2403 + if (pswit[ECHO_SWITCH])
1.2404 + printf("\n%s\n",aline);
1.2405 + if (!pswit[OVERVIEW_SWITCH])
1.2406 + printf(" Line %ld column %d - Query hut/but error?\n",
1.2407 + linecnt,(int)(s-aline)+1);
1.2408 + else
1.2409 + cnt_word++;
1.2410 + }
1.2411 + /*
1.2412 + * Special case - angled bracket in front of "From" placed there by an
1.2413 + * MTA when sending an e-mail.
1.2414 + */
1.2415 + if (strstr(aline,">From"))
1.2416 + {
1.2417 + if (pswit[ECHO_SWITCH])
1.2418 + printf("\n%s\n",aline);
1.2419 + if (!pswit[OVERVIEW_SWITCH])
1.2420 + printf(" Line %ld column %d - "
1.2421 + "Query angled bracket with From\n",
1.2422 + linecnt,(int)(strstr(aline,">From")-aline)+1);
1.2423 + else
1.2424 + cnt_punct++;
1.2425 + }
1.2426 + /*
1.2427 + * Check for a single character line -
1.2428 + * often an overflow from bad wrapping.
1.2429 + */
1.2430 + if (*aline && !aline[1])
1.2431 + {
1.2432 + if (*aline=='I' || *aline=='V' || *aline=='X' || *aline=='L' ||
1.2433 + gcisdigit(*aline))
1.2434 + ; /* Nothing - ignore numerals alone on a line. */
1.2435 + else
1.2436 + {
1.2437 + if (pswit[ECHO_SWITCH])
1.2438 + printf("\n%s\n",aline);
1.2439 if (!pswit[OVERVIEW_SWITCH])
1.2440 - printf(" Line %ld column 1 - Begins with punctuation?\n", linecnt);
1.2441 + printf(" Line %ld column 1 - "
1.2442 + "Query single character line\n",linecnt);
1.2443 else
1.2444 cnt_punct++;
1.2445 - }
1.2446 - }
1.2447 -
1.2448 - /* Check for spaced em-dashes */
1.2449 - /* V.20 must check _all_ occurrences of "--" on the line */
1.2450 - /* hence the loop - even if the first double-dash is OK */
1.2451 - /* there may be another that's wrong later on. */
1.2452 - if (warn_dash) {
1.2453 - s = aline;
1.2454 - while (strstr(s,"--")) {
1.2455 - if (*(strstr(s, "--")-1) == CHAR_SPACE ||
1.2456 - (*(strstr(s, "--")+2) == CHAR_SPACE)) {
1.2457 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.2458 - if (!pswit[OVERVIEW_SWITCH])
1.2459 - printf(" Line %ld column %d - Spaced em-dash?\n", linecnt, (int) (strstr(s,"--") - aline) + 1);
1.2460 - else
1.2461 - cnt_dash++;
1.2462 - }
1.2463 - s = strstr(s,"--") + 2;
1.2464 - }
1.2465 - }
1.2466 -
1.2467 - /* Check for spaced dashes */
1.2468 - if (warn_dash)
1.2469 - if (strstr(aline," -")) {
1.2470 - if (*(strstr(aline, " -")+2) != '-') {
1.2471 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.2472 - if (!pswit[OVERVIEW_SWITCH])
1.2473 - printf(" Line %ld column %d - Spaced dash?\n", linecnt, (int) (strstr(aline," -") - aline) + 1);
1.2474 - else
1.2475 - cnt_dash++;
1.2476 - }
1.2477 - }
1.2478 - else
1.2479 - if (strstr(aline,"- ")) {
1.2480 - if (*(strstr(aline, "- ")-1) != '-') {
1.2481 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.2482 - if (!pswit[OVERVIEW_SWITCH])
1.2483 - printf(" Line %ld column %d - Spaced dash?\n", linecnt, (int) (strstr(aline,"- ") - aline) + 1);
1.2484 - else
1.2485 - cnt_dash++;
1.2486 - }
1.2487 - }
1.2488 -
1.2489 - /* v 0.99 */
1.2490 - /* Check for unmarked paragraphs indicated by separate speakers */
1.2491 - /* May well be false positive: */
1.2492 - /* "Bravo!" "Wonderful!" called the crowd. */
1.2493 - /* but useful all the same. */
1.2494 - s = wrk;
1.2495 - *s = 0;
1.2496 - if (strstr(aline, "\" \"")) s = strstr(aline, "\" \"");
1.2497 - if (strstr(aline, "\" \"")) s = strstr(aline, "\" \"");
1.2498 - if (*s) {
1.2499 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.2500 + }
1.2501 + }
1.2502 + /* Check for I" - often should be ! */
1.2503 + if (strstr(aline," I\""))
1.2504 + {
1.2505 + if (pswit[ECHO_SWITCH])
1.2506 + printf("\n%s\n",aline);
1.2507 if (!pswit[OVERVIEW_SWITCH])
1.2508 - printf(" Line %ld column %d - Query missing paragraph break?\n", linecnt, (int)(s - aline) +1);
1.2509 + printf(" Line %ld column %ld - Query I=exclamation mark?\n",
1.2510 + linecnt,strstr(aline," I\"")-aline);
1.2511 else
1.2512 cnt_punct++;
1.2513 - }
1.2514 -
1.2515 -
1.2516 -
1.2517 - /* Check for "to he" and other easy he/be errors */
1.2518 - /* This is a very inadequate effort on the he/be problem, */
1.2519 - /* but the phrase "to he" is always an error, whereas "to */
1.2520 - /* be" is quite common. I chuckle when it does catch one! */
1.2521 - /* Similarly, '"Quiet!", be said.' is a non-be error */
1.2522 - /* V .18 - "to he" is _not_ always an error!: */
1.2523 - /* "Where they went to he couldn't say." */
1.2524 - /* but I'm leaving it in anyway. */
1.2525 - /* V .20 Another false positive: */
1.2526 - /* What would "Cinderella" be without the . . . */
1.2527 - /* and another "If he wants to he can see for himself." */
1.2528 - /* V .21 Added " is be " and " be is " and " be was " */
1.2529 - /* V .99 Added jeebies code -- removed again. */
1.2530 - /* Is jeebies code worth adding? Rare to see he/be */
1.2531 - /* errors with modern OCR. Separate program? Yes! */
1.2532 - /* jeebies does the job without cluttering up this. */
1.2533 - /* We do get a few more queryable pairs from the */
1.2534 - /* project though -- they're cheap to implement. */
1.2535 - /* Also added a column number for guiguts. */
1.2536 -
1.2537 - s = wrk;
1.2538 - *s = 0;
1.2539 - if (strstr(aline," to he ")) s = strstr(aline," to he ");
1.2540 - if (strstr(aline,"\" be ")) s = strstr(aline,"\" be ");
1.2541 - if (strstr(aline,"\", be ")) s = strstr(aline,"\", be ");
1.2542 - if (strstr(aline," is be ")) s = strstr(aline," is be ");
1.2543 - if (strstr(aline," be is ")) s = strstr(aline," be is ");
1.2544 - if (strstr(aline," was be ")) s = strstr(aline," was be ");
1.2545 - if (strstr(aline," be would ")) s = strstr(aline," be would ");
1.2546 - if (strstr(aline," be could ")) s = strstr(aline," be could ");
1.2547 - if (*s) {
1.2548 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.2549 + }
1.2550 + /*
1.2551 + * Check for period without a capital letter. Cut-down from gutspell.
1.2552 + * Only works when it happens on a single line.
1.2553 + */
1.2554 + if (pswit[PARANOID_SWITCH])
1.2555 + {
1.2556 + for (t=s=aline;strstr(t,". ");)
1.2557 + {
1.2558 + t=strstr(t,". ");
1.2559 + if (t==s)
1.2560 + {
1.2561 + t++;
1.2562 + /* start of line punctuation is handled elsewhere */
1.2563 + continue;
1.2564 + }
1.2565 + if (!gcisalpha(t[-1]))
1.2566 + {
1.2567 + t++;
1.2568 + continue;
1.2569 + }
1.2570 + if (isDutch)
1.2571 + {
1.2572 + /* For Frank & Jeroen -- 's Middags case */
1.2573 + if (t[2]==CHAR_SQUOTE && t[3]>='a' && t[3]<='z' &&
1.2574 + t[4]==CHAR_SPACE && t[5]>='A' && t[5]<='Z')
1.2575 + {
1.2576 + t++;
1.2577 + continue;
1.2578 + }
1.2579 + }
1.2580 + s1=t+2;
1.2581 + while (*s1 && !gcisalpha(*s1) && !isdigit(*s1))
1.2582 + s1++;
1.2583 + if (*s1>='a' && *s1<='z')
1.2584 + {
1.2585 + /* we have something to investigate */
1.2586 + istypo=1;
1.2587 + /* so let's go back and find out */
1.2588 + for (s1=t-1;s1>=s &&
1.2589 + (gcisalpha(*s1) || gcisdigit(*s1) || *s1==CHAR_SQUOTE &&
1.2590 + gcisalpha(s1[1]) && gcisalpha(s1[-1]));s1--)
1.2591 + ;
1.2592 + s1++;
1.2593 + for (i=0;*s1 && *s1!='.';s1++,i++)
1.2594 + testword[i]=*s1;
1.2595 + testword[i]=0;
1.2596 + for (i=0;*abbrev[i];i++)
1.2597 + if (!strcmp(testword,abbrev[i]))
1.2598 + istypo=0;
1.2599 + if (gcisdigit(*testword))
1.2600 + istypo=0;
1.2601 + if (!testword[1])
1.2602 + istypo=0;
1.2603 + if (isroman(testword))
1.2604 + istypo=0;
1.2605 + if (istypo)
1.2606 + {
1.2607 + istypo=0;
1.2608 + for (i=0;testword[i];i++)
1.2609 + if (strchr(vowels,testword[i]))
1.2610 + istypo=1;
1.2611 + }
1.2612 + if (istypo)
1.2613 + {
1.2614 + isdup=0;
1.2615 + if (strlen(testword)<MAX_QWORD_LENGTH &&
1.2616 + !pswit[VERBOSE_SWITCH])
1.2617 + for (i=0;i<qperiod_index;i++)
1.2618 + if (!strcmp(testword,qperiod[i]))
1.2619 + isdup=1;
1.2620 + if (!isdup)
1.2621 + {
1.2622 + if (qperiod_index<MAX_QWORD &&
1.2623 + strlen(testword)<MAX_QWORD_LENGTH)
1.2624 + {
1.2625 + strcpy(qperiod[qperiod_index],testword);
1.2626 + qperiod_index++;
1.2627 + }
1.2628 + if (pswit[ECHO_SWITCH])
1.2629 + printf("\n%s\n",aline);
1.2630 + if (!pswit[OVERVIEW_SWITCH])
1.2631 + printf(" Line %ld column %d - "
1.2632 + "Extra period?\n",linecnt,(int)(t-aline)+1);
1.2633 + else
1.2634 + cnt_punct++;
1.2635 + }
1.2636 + }
1.2637 + }
1.2638 + t++;
1.2639 + }
1.2640 + }
1.2641 + if (pswit[TYPO_SWITCH])
1.2642 + {
1.2643 + /* Check for words usually not followed by punctuation. */
1.2644 + for (s=aline;*s;)
1.2645 + {
1.2646 + wordstart=s;
1.2647 + s=getaword(s,inword);
1.2648 + if (!*inword)
1.2649 + continue;
1.2650 + lowerit(inword);
1.2651 + for (i=0;*nocomma[i];i++)
1.2652 + if (!strcmp(inword,nocomma[i]))
1.2653 + {
1.2654 + if (*s==',' || *s==';' || *s==':')
1.2655 + {
1.2656 + if (pswit[ECHO_SWITCH])
1.2657 + printf("\n%s\n",aline);
1.2658 + if (!pswit[OVERVIEW_SWITCH])
1.2659 + printf(" Line %ld column %d - "
1.2660 + "Query punctuation after %s?\n",
1.2661 + linecnt,(int)(s-aline)+1,inword);
1.2662 + else
1.2663 + cnt_punct++;
1.2664 + }
1.2665 + }
1.2666 + for (i=0;*noperiod[i];i++)
1.2667 + if (!strcmp(inword,noperiod[i]))
1.2668 + {
1.2669 + if (*s=='.' || *s=='!')
1.2670 + {
1.2671 + if (pswit[ECHO_SWITCH])
1.2672 + printf("\n%s\n",aline);
1.2673 + if (!pswit[OVERVIEW_SWITCH])
1.2674 + printf(" Line %ld column %d - "
1.2675 + "Query punctuation after %s?\n",
1.2676 + linecnt,(int)(s-aline)+1,inword);
1.2677 + else
1.2678 + cnt_punct++;
1.2679 + }
1.2680 + }
1.2681 + }
1.2682 + }
1.2683 + /*
1.2684 + * Check for commonly mistyped words,
1.2685 + * and digits like 0 for O in a word.
1.2686 + */
1.2687 + for (s=aline;*s;)
1.2688 + {
1.2689 + wordstart=s;
1.2690 + s=getaword(s,inword);
1.2691 + if (!*inword)
1.2692 + continue; /* don't bother with empty lines */
1.2693 + if (mixdigit(inword))
1.2694 + {
1.2695 + if (pswit[ECHO_SWITCH])
1.2696 + printf("\n%s\n",aline);
1.2697 + if (!pswit[OVERVIEW_SWITCH])
1.2698 + printf(" Line %ld column %ld - Query digit in %s\n",
1.2699 + linecnt,(int)(wordstart-aline)+1,inword);
1.2700 + else
1.2701 + cnt_word++;
1.2702 + }
1.2703 + /*
1.2704 + * Put the word through a series of tests for likely typos and OCR
1.2705 + * errors.
1.2706 + */
1.2707 + if (pswit[TYPO_SWITCH])
1.2708 + {
1.2709 + istypo=0;
1.2710 + strcpy(testword,inword);
1.2711 + alower=0;
1.2712 + for (i=0;i<(signed int)strlen(testword);i++)
1.2713 + {
1.2714 + /* lowercase for testing */
1.2715 + if (testword[i]>='a' && testword[i]<='z')
1.2716 + alower=1;
1.2717 + if (alower && testword[i]>='A' && testword[i]<='Z')
1.2718 + {
1.2719 + /*
1.2720 + * We have an uppercase mid-word. However, there are
1.2721 + * common cases:
1.2722 + * Mac and Mc like McGill
1.2723 + * French contractions like l'Abbe
1.2724 + */
1.2725 + if (i==2 && testword[0]=='m' && testword[1]=='c' ||
1.2726 + i==3 && testword[0]=='m' && testword[1]=='a' &&
1.2727 + testword[2]=='c' || i>0 && testword[i-1]==CHAR_SQUOTE)
1.2728 + ; /* do nothing! */
1.2729 + else
1.2730 + istypo=1;
1.2731 + }
1.2732 + testword[i]=(char)tolower(testword[i]);
1.2733 + }
1.2734 + /*
1.2735 + * Check for certain unlikely two-letter combinations at word
1.2736 + * start and end.
1.2737 + */
1.2738 + if (strlen(testword)>1)
1.2739 + {
1.2740 + for (i=0;*nostart[i];i++)
1.2741 + if (!strncmp(testword,nostart[i],2))
1.2742 + istypo=1;
1.2743 + for (i=0;*noend[i];i++)
1.2744 + if (!strncmp(testword+strlen(testword)-2,noend[i],2))
1.2745 + istypo=1;
1.2746 + }
1.2747 + /* ght is common, gbt never. Like that. */
1.2748 + if (strstr(testword,"cb"))
1.2749 + istypo=1;
1.2750 + if (strstr(testword,"gbt"))
1.2751 + istypo=1;
1.2752 + if (strstr(testword,"pbt"))
1.2753 + istypo=1;
1.2754 + if (strstr(testword,"tbs"))
1.2755 + istypo=1;
1.2756 + if (strstr(testword,"mrn"))
1.2757 + istypo=1;
1.2758 + if (strstr(testword,"ahle"))
1.2759 + istypo=1;
1.2760 + if (strstr(testword,"ihle"))
1.2761 + istypo=1;
1.2762 + /*
1.2763 + * "TBE" does happen - like HEARTBEAT - but uncommon.
1.2764 + * Also "TBI" - frostbite, outbid - but uncommon.
1.2765 + * Similarly "ii" like Hawaii, or Pompeii, and in Roman
1.2766 + * numerals, but "ii" is a common scanno.
1.2767 + */
1.2768 + if (strstr(testword,"tbi"))
1.2769 + istypo=1;
1.2770 + if (strstr(testword,"tbe"))
1.2771 + istypo=1;
1.2772 + if (strstr(testword,"ii"))
1.2773 + istypo=1;
1.2774 + /*
1.2775 + * Check for no vowels or no consonants.
1.2776 + * If none, flag a typo.
1.2777 + */
1.2778 + if (!istypo && strlen(testword)>1)
1.2779 + {
1.2780 + vowel=consonant=0;
1.2781 + for (i=0;testword[i];i++)
1.2782 + {
1.2783 + if (testword[i]=='y' || gcisdigit(testword[i]))
1.2784 + {
1.2785 + /* Yah, this is loose. */
1.2786 + vowel++;
1.2787 + consonant++;
1.2788 + }
1.2789 + else if (strchr(vowels,testword[i]))
1.2790 + vowel++;
1.2791 + else
1.2792 + consonant++;
1.2793 + }
1.2794 + if (!vowel || !consonant)
1.2795 + istypo=1;
1.2796 + }
1.2797 + /*
1.2798 + * Now exclude the word from being reported if it's in
1.2799 + * the okword list.
1.2800 + */
1.2801 + for (i=0;*okword[i];i++)
1.2802 + if (!strcmp(testword,okword[i]))
1.2803 + istypo=0;
1.2804 + /*
1.2805 + * What looks like a typo may be a Roman numeral.
1.2806 + * Exclude these.
1.2807 + */
1.2808 + if (istypo && isroman(testword))
1.2809 + istypo=0;
1.2810 + /* Check the manual list of typos. */
1.2811 + if (!istypo)
1.2812 + for (i=0;*typo[i];i++)
1.2813 + if (!strcmp(testword,typo[i]))
1.2814 + istypo=1;
1.2815 + /*
1.2816 + * Check lowercase s, l, i and m - special cases.
1.2817 + * "j" - often a semi-colon gone wrong.
1.2818 + * "d" for a missing apostrophe - he d
1.2819 + * "n" for "in"
1.2820 + */
1.2821 + if (!istypo && strlen(testword)==1 && strchr("slmijdn",*inword))
1.2822 + istypo=1;
1.2823 + if (istypo)
1.2824 + {
1.2825 + isdup=0;
1.2826 + if (strlen(testword)<MAX_QWORD_LENGTH &&
1.2827 + !pswit[VERBOSE_SWITCH])
1.2828 + for (i=0;i<qword_index;i++)
1.2829 + if (!strcmp(testword,qword[i]))
1.2830 + {
1.2831 + isdup=1;
1.2832 + ++dupcnt[i];
1.2833 + }
1.2834 + if (!isdup)
1.2835 + {
1.2836 + if (qword_index<MAX_QWORD &&
1.2837 + strlen(testword)<MAX_QWORD_LENGTH)
1.2838 + {
1.2839 + strcpy(qword[qword_index],testword);
1.2840 + qword_index++;
1.2841 + }
1.2842 + if (pswit[ECHO_SWITCH])
1.2843 + printf("\n%s\n",aline);
1.2844 + if (!pswit[OVERVIEW_SWITCH])
1.2845 + {
1.2846 + printf(" Line %ld column %d - Query word %s",
1.2847 + linecnt,(int)(wordstart-aline)+1,inword);
1.2848 + if (strlen(testword)<MAX_QWORD_LENGTH &&
1.2849 + !pswit[VERBOSE_SWITCH])
1.2850 + printf(" - not reporting duplicates");
1.2851 + printf("\n");
1.2852 + }
1.2853 + else
1.2854 + cnt_word++;
1.2855 + }
1.2856 + }
1.2857 + }
1.2858 + /* check the user's list of typos */
1.2859 + if (!istypo && usertypo_count)
1.2860 + for (i=0;i<usertypo_count;i++)
1.2861 + if (!strcmp(testword,usertypo[i]))
1.2862 + {
1.2863 + if (pswit[ECHO_SWITCH])
1.2864 + printf("\n%s\n",aline);
1.2865 + if (!pswit[OVERVIEW_SWITCH])
1.2866 + printf(" Line %ld column %d - "
1.2867 + "Query possible scanno %s\n",
1.2868 + linecnt,(int)(wordstart-aline)+2,inword);
1.2869 + }
1.2870 + if (pswit[PARANOID_SWITCH] && warn_digit)
1.2871 + {
1.2872 + /* In paranoid mode, query all 0 and 1 standing alone. */
1.2873 + if (!strcmp(inword,"0") || !strcmp(inword,"1"))
1.2874 + {
1.2875 + if (pswit[ECHO_SWITCH])
1.2876 + printf("\n%s\n",aline);
1.2877 + if (!pswit[OVERVIEW_SWITCH])
1.2878 + printf(" Line %ld column %d - Query standalone %s\n",
1.2879 + linecnt,(int)(wordstart-aline)+2,inword);
1.2880 + else
1.2881 + cnt_word++;
1.2882 + }
1.2883 + }
1.2884 + }
1.2885 + /*
1.2886 + * Look for added or missing spaces around punctuation and quotes.
1.2887 + * If there is a punctuation character like ! with no space on
1.2888 + * either side, suspect a missing!space. If there are spaces on
1.2889 + * both sides , assume a typo. If we see a double quote with no
1.2890 + * space or punctuation on either side of it, assume unspaced
1.2891 + * quotes "like"this.
1.2892 + */
1.2893 + llen=strlen(aline);
1.2894 + for (i=1;i<llen;i++)
1.2895 + {
1.2896 + /* For each character in the line after the first. */
1.2897 + if (strchr(".?!,;:_",aline[i])) /* if it's punctuation */
1.2898 + {
1.2899 + /* we need to suppress warnings for acronyms like M.D. */
1.2900 + isacro=0;
1.2901 + /* we need to suppress warnings for ellipsis . . . */
1.2902 + isellipsis=0;
1.2903 + /* if there are letters on both sides of it or ... */
1.2904 + if (gcisalpha(aline[i-1]) && gcisalpha(aline[i+1]) ||
1.2905 + gcisalpha(aline[i+1]) && strchr("?!,;:",aline[i]))
1.2906 + {
1.2907 + /* ...if it's strict punctuation followed by an alpha */
1.2908 + if (aline[i]=='.')
1.2909 + {
1.2910 + if (i>2 && aline[i-2]=='.')
1.2911 + isacro=1;
1.2912 + if (i+2<llen && aline[i+2]=='.')
1.2913 + isacro=1;
1.2914 + }
1.2915 + if (!isacro)
1.2916 + {
1.2917 + if (pswit[ECHO_SWITCH])
1.2918 + printf("\n%s\n",aline);
1.2919 + if (!pswit[OVERVIEW_SWITCH])
1.2920 + printf(" Line %ld column %d - Missing space?\n",
1.2921 + linecnt,i+1);
1.2922 + else
1.2923 + cnt_punct++;
1.2924 + }
1.2925 + }
1.2926 + if (aline[i-1]==CHAR_SPACE &&
1.2927 + (aline[i+1]==CHAR_SPACE || aline[i+1]==0))
1.2928 + {
1.2929 + /*
1.2930 + * If there are spaces on both sides,
1.2931 + * or space before and end of line.
1.2932 + */
1.2933 + if (aline[i]=='.')
1.2934 + {
1.2935 + if (i>2 && aline[i-2]=='.')
1.2936 + isellipsis=1;
1.2937 + if (i+2<llen && aline[i+2]=='.')
1.2938 + isellipsis=1;
1.2939 + }
1.2940 + if (!isemptyline && !isellipsis)
1.2941 + {
1.2942 + if (pswit[ECHO_SWITCH])
1.2943 + printf("\n%s\n",aline);
1.2944 + if (!pswit[OVERVIEW_SWITCH])
1.2945 + printf(" Line %ld column %d - "
1.2946 + "Spaced punctuation?\n",linecnt,i+1);
1.2947 + else
1.2948 + cnt_punct++;
1.2949 + }
1.2950 + }
1.2951 + }
1.2952 + }
1.2953 + /* Split out the characters that CANNOT be preceded by space. */
1.2954 + llen=strlen(aline);
1.2955 + for (i=1;i<llen;i++)
1.2956 + {
1.2957 + /* for each character in the line after the first */
1.2958 + if (strchr("?!,;:",aline[i]))
1.2959 + {
1.2960 + /* if it's punctuation that _cannot_ have a space before it */
1.2961 + if (aline[i-1]==CHAR_SPACE && !isemptyline &&
1.2962 + aline[i+1]!=CHAR_SPACE)
1.2963 + {
1.2964 + /*
1.2965 + * If aline[i+1) DOES == space,
1.2966 + * it was already reported just above.
1.2967 + */
1.2968 + if (pswit[ECHO_SWITCH])
1.2969 + printf("\n%s\n",aline);
1.2970 + if (!pswit[OVERVIEW_SWITCH])
1.2971 + printf(" Line %ld column %d - Spaced punctuation?\n",
1.2972 + linecnt,i+1);
1.2973 + else
1.2974 + cnt_punct++;
1.2975 + }
1.2976 + }
1.2977 + }
1.2978 + /*
1.2979 + * Special case " .X" where X is any alpha.
1.2980 + * This plugs a hole in the acronym code above.
1.2981 + * Inelegant, but maintainable.
1.2982 + */
1.2983 + llen=strlen(aline);
1.2984 + for (i=1;i<llen;i++)
1.2985 + {
1.2986 + /* for each character in the line after the first */
1.2987 + if (aline[i]=='.')
1.2988 + {
1.2989 + /* if it's a period */
1.2990 + if (aline[i-1]==CHAR_SPACE && gcisalpha(aline[i+1]))
1.2991 + {
1.2992 + /*
1.2993 + * If the period follows a space and
1.2994 + * is followed by a letter.
1.2995 + */
1.2996 + if (pswit[ECHO_SWITCH])
1.2997 + printf("\n%s\n",aline);
1.2998 + if (!pswit[OVERVIEW_SWITCH])
1.2999 + printf(" Line %ld column %d - Spaced punctuation?\n",
1.3000 + linecnt,i+1);
1.3001 + else
1.3002 + cnt_punct++;
1.3003 + }
1.3004 + }
1.3005 + }
1.3006 + for (i=1;i<llen;i++)
1.3007 + {
1.3008 + /* for each character in the line after the first */
1.3009 + if (aline[i]==CHAR_DQUOTE)
1.3010 + {
1.3011 + if (!strchr(" _-.'`,;:!/([{?}])",aline[i-1]) &&
1.3012 + !strchr(" _-.'`,;:!/([{?}])",aline[i+1]) && aline[i+1] ||
1.3013 + !strchr(" _-([{'`",aline[i-1]) && gcisalpha(aline[i+1]))
1.3014 + {
1.3015 + if (pswit[ECHO_SWITCH])
1.3016 + printf("\n%s\n",aline);
1.3017 + if (!pswit[OVERVIEW_SWITCH])
1.3018 + printf(" Line %ld column %d - Unspaced quotes?\n",
1.3019 + linecnt,i+1);
1.3020 + else
1.3021 + cnt_punct++;
1.3022 + }
1.3023 + }
1.3024 + }
1.3025 + /* Check parity of quotes. */
1.3026 + for (s=aline;*s;s++)
1.3027 + {
1.3028 + if (*s==CHAR_DQUOTE)
1.3029 + {
1.3030 + if (!(dquotepar=!dquotepar))
1.3031 + {
1.3032 + /* parity even */
1.3033 + if (!strchr("_-.'`/,;:!?)]} ",s[1]))
1.3034 + {
1.3035 + if (pswit[ECHO_SWITCH])
1.3036 + printf("\n%s\n",aline);
1.3037 + if (!pswit[OVERVIEW_SWITCH])
1.3038 + printf(" Line %ld column %d - "
1.3039 + "Wrongspaced quotes?\n",linecnt,(int)(s-aline)+1);
1.3040 + else
1.3041 + cnt_punct++;
1.3042 + }
1.3043 + }
1.3044 + else
1.3045 + {
1.3046 + /* parity odd */
1.3047 + if (!gcisalpha(s[1]) && !isdigit(s[1]) &&
1.3048 + !strchr("_-/.'`([{$",s[1]) || !s[1])
1.3049 + {
1.3050 + if (pswit[ECHO_SWITCH])
1.3051 + printf("\n%s\n",aline);
1.3052 + if (!pswit[OVERVIEW_SWITCH])
1.3053 + printf(" Line %ld column %d - "
1.3054 + "Wrongspaced quotes?\n",linecnt,(int)(s-aline)+1);
1.3055 + else
1.3056 + cnt_punct++;
1.3057 + }
1.3058 + }
1.3059 + }
1.3060 + }
1.3061 + if (*aline==CHAR_DQUOTE)
1.3062 + {
1.3063 + if (strchr(",;:!?)]} ",aline[1]))
1.3064 + {
1.3065 + if (pswit[ECHO_SWITCH])
1.3066 + printf("\n%s\n",aline);
1.3067 + if (!pswit[OVERVIEW_SWITCH])
1.3068 + printf(" Line %ld column 1 - Wrongspaced quotes?\n",
1.3069 + linecnt,(int)(s-aline)+1);
1.3070 + else
1.3071 + cnt_punct++;
1.3072 + }
1.3073 + }
1.3074 + if (pswit[SQUOTE_SWITCH])
1.3075 + {
1.3076 + for (s=aline;*s;s++)
1.3077 + {
1.3078 + if ((*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE) &&
1.3079 + (s==aline || s>aline && !gcisalpha(s[-1]) ||
1.3080 + !gcisalpha(s[1])))
1.3081 + {
1.3082 + if (!(squotepar=!squotepar))
1.3083 + {
1.3084 + /* parity even */
1.3085 + if (!strchr("_-.'`/\",;:!?)]} ",s[1]))
1.3086 + {
1.3087 + if (pswit[ECHO_SWITCH])
1.3088 + printf("\n%s\n",aline);
1.3089 + if (!pswit[OVERVIEW_SWITCH])
1.3090 + printf(" Line %ld column %d - "
1.3091 + "Wrongspaced singlequotes?\n",
1.3092 + linecnt,(int)(s-aline)+1);
1.3093 + else
1.3094 + cnt_punct++;
1.3095 + }
1.3096 + }
1.3097 + else
1.3098 + {
1.3099 + /* parity odd */
1.3100 + if (!gcisalpha(s[1]) && !isdigit(s[1]) &&
1.3101 + !strchr("_-/\".'`",s[1]) || !s[1])
1.3102 + {
1.3103 + if (pswit[ECHO_SWITCH])
1.3104 + printf("\n%s\n",aline);
1.3105 + if (!pswit[OVERVIEW_SWITCH])
1.3106 + printf(" Line %ld column %d - "
1.3107 + "Wrongspaced singlequotes?\n",
1.3108 + linecnt,(int)(s-aline)+1);
1.3109 + else
1.3110 + cnt_punct++;
1.3111 + }
1.3112 + }
1.3113 + }
1.3114 + }
1.3115 + }
1.3116 + /*
1.3117 + * Look for double punctuation like ,. or ,,
1.3118 + * Thanks to DW for the suggestion!
1.3119 + * In books with references, ".," and ".;" are common
1.3120 + * e.g. "etc., etc.," and vol. 1.; vol 3.;
1.3121 + * OTOH, from my initial tests, there are also fairly
1.3122 + * common errors. What to do? Make these cases paranoid?
1.3123 + * ".," is the most common, so warn_dotcomma is used
1.3124 + * to suppress detailed reporting if it occurs often.
1.3125 + */
1.3126 + llen=strlen(aline);
1.3127 + for (i=0;i<llen;i++)
1.3128 + {
1.3129 + /* for each punctuation character in the line */
1.3130 + if (strchr(".?!,;:",aline[i]) && (strchr(".?!,;:",aline[i+1])) &&
1.3131 + aline[i] && aline[i+1])
1.3132 + {
1.3133 + /* followed by punctuation, it's a query, unless . . . */
1.3134 + if (aline[i]==aline[i+1] && (aline[i]=='.' || aline[i]=='?' ||
1.3135 + aline[i]=='!') ||
1.3136 + !warn_dotcomma && aline[i]=='.' && aline[i+1]==',' ||
1.3137 + isFrench && !strncmp(aline+i,",...",4) ||
1.3138 + isFrench && !strncmp(aline+i,"...,",4) ||
1.3139 + isFrench && !strncmp(aline+i,";...",4) ||
1.3140 + isFrench && !strncmp(aline+i,"...;",4) ||
1.3141 + isFrench && !strncmp(aline+i,":...",4) ||
1.3142 + isFrench && !strncmp(aline+i,"...:",4) ||
1.3143 + isFrench && !strncmp(aline+i,"!...",4) ||
1.3144 + isFrench && !strncmp(aline+i,"...!",4) ||
1.3145 + isFrench && !strncmp(aline+i,"?...",4) ||
1.3146 + isFrench && !strncmp(aline+i,"...?",4))
1.3147 + {
1.3148 + if (isFrench && !strncmp(aline+i,",...",4) ||
1.3149 + isFrench && !strncmp(aline+i,"...,",4) ||
1.3150 + isFrench && !strncmp(aline+i,";...",4) ||
1.3151 + isFrench && !strncmp(aline+i,"...;",4) ||
1.3152 + isFrench && !strncmp(aline+i,":...",4) ||
1.3153 + isFrench && !strncmp(aline+i,"...:",4) ||
1.3154 + isFrench && !strncmp(aline+i,"!...",4) ||
1.3155 + isFrench && !strncmp(aline+i,"...!",4) ||
1.3156 + isFrench && !strncmp(aline+i,"?...",4) ||
1.3157 + isFrench && !strncmp(aline+i,"...?",4))
1.3158 + i+=4;
1.3159 + ; /* do nothing for .. !! and ?? which can be legit */
1.3160 + }
1.3161 + else
1.3162 + {
1.3163 + if (pswit[ECHO_SWITCH])
1.3164 + printf("\n%s\n",aline);
1.3165 + if (!pswit[OVERVIEW_SWITCH])
1.3166 + printf(" Line %ld column %d - Double punctuation?\n",
1.3167 + linecnt,i+1);
1.3168 + else
1.3169 + cnt_punct++;
1.3170 + }
1.3171 + }
1.3172 + }
1.3173 + s=aline;
1.3174 + while (strstr(s," \" "))
1.3175 + {
1.3176 + if (pswit[ECHO_SWITCH])
1.3177 + printf("\n%s\n",aline);
1.3178 if (!pswit[OVERVIEW_SWITCH])
1.3179 - printf(" Line %ld column %d - Query he/be error?\n", linecnt, (int)(s - aline) +1);
1.3180 - else
1.3181 - cnt_word++;
1.3182 - }
1.3183 -
1.3184 - s = wrk;
1.3185 - *s = 0;
1.3186 - if (strstr(aline," i bad ")) s = strstr(aline," i bad ");
1.3187 - if (strstr(aline," you bad ")) s = strstr(aline," you bad ");
1.3188 - if (strstr(aline," he bad ")) s = strstr(aline," he bad ");
1.3189 - if (strstr(aline," she bad ")) s = strstr(aline," she bad ");
1.3190 - if (strstr(aline," they bad ")) s = strstr(aline," they bad ");
1.3191 - if (strstr(aline," a had ")) s = strstr(aline," a had ");
1.3192 - if (strstr(aline," the had ")) s = strstr(aline," the had ");
1.3193 - if (*s) {
1.3194 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3195 - if (!pswit[OVERVIEW_SWITCH])
1.3196 - printf(" Line %ld column %d - Query had/bad error?\n", linecnt, (int)(s - aline) +1);
1.3197 - else
1.3198 - cnt_word++;
1.3199 - }
1.3200 -
1.3201 -
1.3202 - /* V .97 Added ", hut " Not too common, hut pretty certain */
1.3203 - /* V.99 changed to add a column number for guiguts */
1.3204 - s = wrk;
1.3205 - *s = 0;
1.3206 - if (strstr(aline,", hut ")) s = strstr(aline,", hut ");
1.3207 - if (strstr(aline,"; hut ")) s = strstr(aline,"; hut ");
1.3208 - if (*s) {
1.3209 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3210 - if (!pswit[OVERVIEW_SWITCH])
1.3211 - printf(" Line %ld column %d - Query hut/but error?\n", linecnt, (int)(s - aline) +1);
1.3212 - else
1.3213 - cnt_word++;
1.3214 - }
1.3215 -
1.3216 - /* Special case - angled bracket in front of "From" placed there by an MTA */
1.3217 - /* when sending an e-mail. V .21 */
1.3218 - if (strstr(aline, ">From")) {
1.3219 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3220 - if (!pswit[OVERVIEW_SWITCH])
1.3221 - printf(" Line %ld column %d - Query angled bracket with From\n", linecnt, (int)(strstr(aline, ">From") - aline) +1);
1.3222 + printf(" Line %ld column %d - Spaced doublequote?\n",
1.3223 + linecnt,(int)(strstr(s," \" ")-aline+1));
1.3224 else
1.3225 cnt_punct++;
1.3226 - }
1.3227 -
1.3228 - /* V 0.98 Check for a single character line - often an overflow from bad wrapping. */
1.3229 - if (*aline && !*(aline+1)) {
1.3230 - if (*aline == 'I' || *aline == 'V' || *aline == 'X' || *aline == 'L' || gcisdigit(*aline))
1.3231 - ; /* nothing - ignore numerals alone on a line. */
1.3232 - else {
1.3233 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3234 + s=strstr(s," \" ")+2;
1.3235 + }
1.3236 + s=aline;
1.3237 + while (strstr(s," ' "))
1.3238 + {
1.3239 + if (pswit[ECHO_SWITCH])
1.3240 + printf("\n%s\n",aline);
1.3241 + if (!pswit[OVERVIEW_SWITCH])
1.3242 + printf(" Line %ld column %d - Spaced singlequote?\n",
1.3243 + linecnt,(int)(strstr(s," ' ")-aline+1));
1.3244 + else
1.3245 + cnt_punct++;
1.3246 + s=strstr(s," ' ")+2;
1.3247 + }
1.3248 + s=aline;
1.3249 + while (strstr(s," ` "))
1.3250 + {
1.3251 + if (pswit[ECHO_SWITCH])
1.3252 + printf("\n%s\n",aline);
1.3253 + if (!pswit[OVERVIEW_SWITCH])
1.3254 + printf(" Line %ld column %d - Spaced singlequote?\n",
1.3255 + linecnt,(int)(strstr(s," ` ")-aline+1));
1.3256 + else
1.3257 + cnt_punct++;
1.3258 + s=strstr(s," ` ")+2;
1.3259 + }
1.3260 + /* check special case of 'S instead of 's at end of word */
1.3261 + s=aline+1;
1.3262 + while (*s)
1.3263 + {
1.3264 + if (*s==CHAR_SQUOTE && s[1]=='S' && s[-1]>='a' && s[-1]<='z')
1.3265 + {
1.3266 + if (pswit[ECHO_SWITCH])
1.3267 + printf("\n%s\n",aline);
1.3268 if (!pswit[OVERVIEW_SWITCH])
1.3269 - printf(" Line %ld column 1 - Query single character line\n", linecnt);
1.3270 + printf(" Line %ld column %d - Capital \"S\"?\n",
1.3271 + linecnt,(int)(s-aline+2));
1.3272 else
1.3273 cnt_punct++;
1.3274 - }
1.3275 - }
1.3276 -
1.3277 - /* V 0.98 Check for I" - often should be ! */
1.3278 - if (strstr(aline, " I\"")) {
1.3279 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3280 - if (!pswit[OVERVIEW_SWITCH])
1.3281 - printf(" Line %ld column %ld - Query I=exclamation mark?\n", linecnt, strstr(aline, " I\"") - aline);
1.3282 - else
1.3283 - cnt_punct++;
1.3284 - }
1.3285 -
1.3286 - /* V 0.98 Check for period without a capital letter. Cut-down from gutspell */
1.3287 - /* Only works when it happens on a single line. */
1.3288 -
1.3289 - if (pswit[PARANOID_SWITCH])
1.3290 - for (t = s = aline; strstr(t,". ");) {
1.3291 - t = strstr(t, ". ");
1.3292 - if (t == s) {
1.3293 - t++;
1.3294 - continue; /* start of line punctuation is handled elsewhere */
1.3295 - }
1.3296 - if (!gcisalpha(*(t-1))) {
1.3297 - t++;
1.3298 - continue;
1.3299 - }
1.3300 - if (isDutch) { /* For Frank & Jeroen -- 's Middags case */
1.3301 - if (*(t+2) == CHAR_SQUOTE &&
1.3302 - *(t+3)>='a' && *(t+3)<='z' &&
1.3303 - *(t+4) == CHAR_SPACE &&
1.3304 - *(t+5)>='A' && *(t+5)<='Z') {
1.3305 - t++;
1.3306 - continue;
1.3307 - }
1.3308 - }
1.3309 - s1 = t+2;
1.3310 - while (*s1 && !gcisalpha(*s1) && !isdigit(*s1))
1.3311 - s1++;
1.3312 - if (*s1 >= 'a' && *s1 <= 'z') { /* we have something to investigate */
1.3313 - istypo = 1;
1.3314 - for (s1 = t - 1; s1 >= s &&
1.3315 - (gcisalpha(*s1) || gcisdigit(*s1) ||
1.3316 - (*s1 == CHAR_SQUOTE && gcisalpha(*(s1+1)) && gcisalpha(*(s1-1)))); s1--); /* so let's go back and find out */
1.3317 - s1++;
1.3318 - for (i = 0; *s1 && *s1 != '.'; s1++, i++)
1.3319 - testword[i] = *s1;
1.3320 - testword[i] = 0;
1.3321 - for (i = 0; *abbrev[i]; i++)
1.3322 - if (!strcmp(testword, abbrev[i]))
1.3323 - istypo = 0;
1.3324 -// if (*testword >= 'A' && *testword <= 'Z')
1.3325 -// istypo = 0;
1.3326 - if (gcisdigit(*testword)) istypo = 0;
1.3327 - if (!*(testword+1)) istypo = 0;
1.3328 - if (isroman(testword)) istypo = 0;
1.3329 - if (istypo) {
1.3330 - istypo = 0;
1.3331 - for (i = 0; testword[i]; i++)
1.3332 - if (strchr(vowels, testword[i]))
1.3333 - istypo = 1;
1.3334 - }
1.3335 - if (istypo) {
1.3336 - isdup = 0;
1.3337 - if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
1.3338 - for (i = 0; i < qperiod_index; i++)
1.3339 - if (!strcmp(testword, qperiod[i])) {
1.3340 - isdup = 1;
1.3341 - }
1.3342 - if (!isdup) {
1.3343 - if (qperiod_index < MAX_QWORD && strlen(testword) < MAX_QWORD_LENGTH) {
1.3344 - strcpy(qperiod[qperiod_index], testword);
1.3345 - qperiod_index++;
1.3346 - }
1.3347 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3348 - if (!pswit[OVERVIEW_SWITCH])
1.3349 - printf(" Line %ld column %d - Extra period?\n", linecnt, (int)(t - aline)+1);
1.3350 - else
1.3351 - cnt_punct++;
1.3352 - }
1.3353 - }
1.3354 - }
1.3355 - t++;
1.3356 - }
1.3357 -
1.3358 -
1.3359 - if (pswit[TYPO_SWITCH]) { /* Should have put this condition in at the start of 0.99. Duh! */
1.3360 - /* Check for words usually not followed by punctuation 0.99 */
1.3361 - for (s = aline; *s;) {
1.3362 - wordstart = s;
1.3363 - s = getaword(s, inword);
1.3364 - if (!*inword) continue;
1.3365 - lowerit(inword);
1.3366 - for (i = 0; *nocomma[i]; i++)
1.3367 - if (!strcmp(inword, nocomma[i])) {
1.3368 - if (*s == ',' || *s == ';' || *s == ':') {
1.3369 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3370 - if (!pswit[OVERVIEW_SWITCH])
1.3371 - printf(" Line %ld column %d - Query punctuation after %s?\n", linecnt, (int)(s - aline)+1, inword);
1.3372 - else
1.3373 - cnt_punct++;
1.3374 - }
1.3375 - }
1.3376 - for (i = 0; *noperiod[i]; i++)
1.3377 - if (!strcmp(inword, noperiod[i])) {
1.3378 - if (*s == '.' || *s == '!') {
1.3379 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3380 - if (!pswit[OVERVIEW_SWITCH])
1.3381 - printf(" Line %ld column %d - Query punctuation after %s?\n", linecnt, (int)(s - aline)+1, inword);
1.3382 - else
1.3383 - cnt_punct++;
1.3384 - }
1.3385 - }
1.3386 - }
1.3387 - }
1.3388 -
1.3389 -
1.3390 -
1.3391 - /* Check for commonly mistyped words, and digits like 0 for O in a word */
1.3392 - for (s = aline; *s;) {
1.3393 - wordstart = s;
1.3394 - s = getaword(s, inword);
1.3395 - if (!*inword) continue; /* don't bother with empty lines */
1.3396 - if (mixdigit(inword)) {
1.3397 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3398 - if (!pswit[OVERVIEW_SWITCH])
1.3399 - printf(" Line %ld column %ld - Query digit in %s\n", linecnt, (int)(wordstart - aline) + 1, inword);
1.3400 - else
1.3401 - cnt_word++;
1.3402 - }
1.3403 -
1.3404 - /* put the word through a series of tests for likely typos and OCR errors */
1.3405 - /* V.21 I had allowed lots of typo-checking even with the typo switch */
1.3406 - /* turned off, but I really should disallow reporting of them when */
1.3407 - /* the switch is off. Hence the "if" below. */
1.3408 - if (pswit[TYPO_SWITCH]) {
1.3409 - istypo = 0;
1.3410 - strcpy(testword, inword);
1.3411 - alower = 0;
1.3412 - for (i = 0; i < (signed int)strlen(testword); i++) { /* lowercase for testing */
1.3413 - if (testword[i] >= 'a' && testword[i] <= 'z') alower = 1;
1.3414 - if (alower && testword[i] >= 'A' && testword[i] <= 'Z') {
1.3415 - /* we have an uppercase mid-word. However, there are common cases: */
1.3416 - /* Mac and Mc like McGill */
1.3417 - /* French contractions like l'Abbe */
1.3418 - if ((i == 2 && testword[0] == 'm' && testword[1] == 'c') ||
1.3419 - (i == 3 && testword[0] == 'm' && testword[1] == 'a' && testword[2] == 'c') ||
1.3420 - (i > 0 && testword[i-1] == CHAR_SQUOTE))
1.3421 - ; /* do nothing! */
1.3422 -
1.3423 - else { /* V.97 - remove separate case of uppercase within word so that */
1.3424 - /* names like VanAllen fall into qword_index and get reported only once */
1.3425 - istypo = 1;
1.3426 - }
1.3427 - }
1.3428 - testword[i] = (char)tolower(testword[i]);
1.3429 - }
1.3430 -
1.3431 - /* check for certain unlikely two-letter combinations at word start and end */
1.3432 - /* V.0.97 - this replaces individual hardcoded checks in previous versions */
1.3433 - if (strlen(testword) > 1) {
1.3434 - for (i = 0; *nostart[i]; i++)
1.3435 - if (!strncmp(testword, nostart[i], 2))
1.3436 - istypo = 1;
1.3437 - for (i = 0; *noend[i]; i++)
1.3438 - if (!strncmp(testword + strlen(testword) -2, noend[i], 2))
1.3439 - istypo = 1;
1.3440 - }
1.3441 -
1.3442 -
1.3443 - /* ght is common, gbt never. Like that. */
1.3444 - if (strstr(testword, "cb")) istypo = 1;
1.3445 - if (strstr(testword, "gbt")) istypo = 1;
1.3446 - if (strstr(testword, "pbt")) istypo = 1;
1.3447 - if (strstr(testword, "tbs")) istypo = 1;
1.3448 - if (strstr(testword, "mrn")) istypo = 1;
1.3449 - if (strstr(testword, "ahle")) istypo = 1;
1.3450 - if (strstr(testword, "ihle")) istypo = 1;
1.3451 -
1.3452 - /* "TBE" does happen - like HEARTBEAT - but uncommon. */
1.3453 - /* Also "TBI" - frostbite, outbid - but uncommon. */
1.3454 - /* Similarly "ii" like Hawaii, or Pompeii, and in Roman numerals, */
1.3455 - /* but these are covered in V.20. "ii" is a common scanno. */
1.3456 - if (strstr(testword, "tbi")) istypo = 1;
1.3457 - if (strstr(testword, "tbe")) istypo = 1;
1.3458 - if (strstr(testword, "ii")) istypo = 1;
1.3459 -
1.3460 - /* check for no vowels or no consonants. */
1.3461 - /* If none, flag a typo */
1.3462 - if (!istypo && strlen(testword)>1) {
1.3463 - vowel = consonant = 0;
1.3464 - for (i = 0; testword[i]; i++)
1.3465 - if (testword[i] == 'y' || gcisdigit(testword[i])) { /* Yah, this is loose. */
1.3466 - vowel++;
1.3467 - consonant++;
1.3468 - }
1.3469 - else
1.3470 - if (strchr(vowels, testword[i])) vowel++;
1.3471 - else consonant++;
1.3472 - if (!vowel || !consonant) {
1.3473 - istypo = 1;
1.3474 - }
1.3475 - }
1.3476 -
1.3477 - /* now exclude the word from being reported if it's in */
1.3478 - /* the okword list */
1.3479 - for (i = 0; *okword[i]; i++)
1.3480 - if (!strcmp(testword, okword[i]))
1.3481 - istypo = 0;
1.3482 -
1.3483 - /* what looks like a typo may be a Roman numeral. Exclude these */
1.3484 - if (istypo)
1.3485 - if (isroman(testword))
1.3486 - istypo = 0;
1.3487 -
1.3488 - /* check the manual list of typos */
1.3489 - if (!istypo)
1.3490 - for (i = 0; *typo[i]; i++)
1.3491 - if (!strcmp(testword, typo[i]))
1.3492 - istypo = 1;
1.3493 -
1.3494 -
1.3495 - /* V.21 - check lowercase s and l - special cases */
1.3496 - /* V.98 - added "i" and "m" */
1.3497 - /* V.99 - added "j" often a semi-colon gone wrong */
1.3498 - /* - and "d" for a missing apostrophe - he d */
1.3499 - /* - and "n" for "in" */
1.3500 - if (!istypo && strlen(testword) == 1)
1.3501 - if (strchr("slmijdn", *inword))
1.3502 - istypo = 1;
1.3503 -
1.3504 -
1.3505 - if (istypo) {
1.3506 - isdup = 0;
1.3507 - if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
1.3508 - for (i = 0; i < qword_index; i++)
1.3509 - if (!strcmp(testword, qword[i])) {
1.3510 - isdup = 1;
1.3511 - ++dupcnt[i];
1.3512 - }
1.3513 - if (!isdup) {
1.3514 - if (qword_index < MAX_QWORD && strlen(testword) < MAX_QWORD_LENGTH) {
1.3515 - strcpy(qword[qword_index], testword);
1.3516 - qword_index++;
1.3517 - }
1.3518 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3519 - if (!pswit[OVERVIEW_SWITCH]) {
1.3520 - printf(" Line %ld column %d - Query word %s", linecnt, (int)(wordstart - aline) + 1, inword);
1.3521 - if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
1.3522 - printf(" - not reporting duplicates");
1.3523 - printf("\n");
1.3524 - }
1.3525 - else
1.3526 - cnt_word++;
1.3527 - }
1.3528 - }
1.3529 - } /* end of typo-checking */
1.3530 -
1.3531 - /* check the user's list of typos */
1.3532 - if (!istypo)
1.3533 - if (usertypo_count)
1.3534 - for (i = 0; i < usertypo_count; i++)
1.3535 - if (!strcmp(testword, usertypo[i])) {
1.3536 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3537 - if (!pswit[OVERVIEW_SWITCH])
1.3538 - printf(" Line %ld column %d - Query possible scanno %s\n", linecnt, (int)(wordstart - aline) + 2, inword);
1.3539 - }
1.3540 -
1.3541 -
1.3542 -
1.3543 - if (pswit[PARANOID_SWITCH] && warn_digit) { /* in paranoid mode, query all 0 and 1 standing alone - added warn_digit V.97*/
1.3544 - if (!strcmp(inword, "0") || !strcmp(inword, "1")) {
1.3545 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3546 + }
1.3547 + s++;
1.3548 + }
1.3549 + /*
1.3550 + * Now check special cases - start and end of line -
1.3551 + * for single and double quotes. Start is sometimes [sic]
1.3552 + * but better to query it anyway.
1.3553 + * While we're here, check for dash at end of line.
1.3554 + */
1.3555 + llen=strlen(aline);
1.3556 + if (llen>1)
1.3557 + {
1.3558 + if (aline[llen-1]==CHAR_DQUOTE || aline[llen-1]==CHAR_SQUOTE ||
1.3559 + aline[llen-1]==CHAR_OPEN_SQUOTE)
1.3560 + if (aline[llen-2]==CHAR_SPACE)
1.3561 + {
1.3562 + if (pswit[ECHO_SWITCH])
1.3563 + printf("\n%s\n",aline);
1.3564 if (!pswit[OVERVIEW_SWITCH])
1.3565 - printf(" Line %ld column %d - Query standalone %s\n", linecnt, (int)(wordstart - aline) + 2, inword);
1.3566 - else
1.3567 - cnt_word++;
1.3568 - }
1.3569 - }
1.3570 - }
1.3571 -
1.3572 - /* look for added or missing spaces around punctuation and quotes */
1.3573 - /* If there is a punctuation character like ! with no space on */
1.3574 - /* either side, suspect a missing!space. If there are spaces on */
1.3575 - /* both sides , assume a typo. If we see a double quote with no */
1.3576 - /* space or punctuation on either side of it, assume unspaced */
1.3577 - /* quotes "like"this. */
1.3578 - llen = strlen(aline);
1.3579 - for (i = 1; i < llen; i++) { /* for each character in the line after the first */
1.3580 - if (strchr(".?!,;:_", aline[i])) { /* if it's punctuation */
1.3581 - isacro = 0; /* we need to suppress warnings for acronyms like M.D. */
1.3582 - isellipsis = 0; /* we need to suppress warnings for ellipsis . . . */
1.3583 - if ( (gcisalpha(aline[i-1]) && gcisalpha(aline[i+1])) || /* if there are letters on both sides of it or ... */
1.3584 - (gcisalpha(aline[i+1]) && strchr("?!,;:", aline[i]))) { /* ...if it's strict punctuation followed by an alpha */
1.3585 - if (aline[i] == '.') {
1.3586 - if (i > 2)
1.3587 - if (aline[i-2] == '.') isacro = 1;
1.3588 - if (i + 2 < llen)
1.3589 - if (aline[i+2] == '.') isacro = 1;
1.3590 - }
1.3591 - if (!isacro) {
1.3592 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3593 - if (!pswit[OVERVIEW_SWITCH])
1.3594 - printf(" Line %ld column %d - Missing space?\n", linecnt, i+1);
1.3595 - else
1.3596 - cnt_punct++;
1.3597 - }
1.3598 - }
1.3599 - if (aline[i-1] == CHAR_SPACE && (aline[i+1] == CHAR_SPACE || aline[i+1] == 0)) { /* if there are spaces on both sides, or space before and end of line */
1.3600 - if (aline[i] == '.') {
1.3601 - if (i > 2)
1.3602 - if (aline[i-2] == '.') isellipsis = 1;
1.3603 - if (i + 2 < llen)
1.3604 - if (aline[i+2] == '.') isellipsis = 1;
1.3605 - }
1.3606 - if (!isemptyline && !isellipsis) {
1.3607 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3608 - if (!pswit[OVERVIEW_SWITCH])
1.3609 - printf(" Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
1.3610 - else
1.3611 - cnt_punct++;
1.3612 - }
1.3613 - }
1.3614 - }
1.3615 - }
1.3616 -
1.3617 - /* 0.98 -- split out the characters that CANNOT be preceded by space */
1.3618 - llen = strlen(aline);
1.3619 - for (i = 1; i < llen; i++) { /* for each character in the line after the first */
1.3620 - if (strchr("?!,;:", aline[i])) { /* if it's punctuation that _cannot_ have a space before it */
1.3621 - if (aline[i-1] == CHAR_SPACE && !isemptyline && aline[i+1] != CHAR_SPACE) { /* if aline[i+1) DOES == space, it was already reported just above */
1.3622 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3623 - if (!pswit[OVERVIEW_SWITCH])
1.3624 - printf(" Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
1.3625 + printf(" Line %ld column %d - Spaced quote?\n",
1.3626 + linecnt,llen);
1.3627 else
1.3628 cnt_punct++;
1.3629 - }
1.3630 - }
1.3631 - }
1.3632 -
1.3633 -
1.3634 - /* 0.99 -- special case " .X" where X is any alpha. */
1.3635 - /* This plugs a hole in the acronym code above. Inelegant, but maintainable. */
1.3636 - llen = strlen(aline);
1.3637 - for (i = 1; i < llen; i++) { /* for each character in the line after the first */
1.3638 - if (aline[i] == '.') { /* if it's a period */
1.3639 - if (aline[i-1] == CHAR_SPACE && gcisalpha(aline[i+1])) { /* if the period follows a space and is followed by a letter */
1.3640 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3641 + }
1.3642 + if ((aline[0]==CHAR_SQUOTE || aline[0]==CHAR_OPEN_SQUOTE) &&
1.3643 + aline[1]==CHAR_SPACE)
1.3644 + {
1.3645 + if (pswit[ECHO_SWITCH])
1.3646 + printf("\n%s\n",aline);
1.3647 + if (!pswit[OVERVIEW_SWITCH])
1.3648 + printf(" Line %ld column 1 - Spaced quote?\n",linecnt);
1.3649 + else
1.3650 + cnt_punct++;
1.3651 + }
1.3652 + /*
1.3653 + * Dash at end of line may well be legit - paranoid mode only
1.3654 + * and don't report em-dash at line-end.
1.3655 + */
1.3656 + if (pswit[PARANOID_SWITCH] && warn_hyphen)
1.3657 + {
1.3658 + for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)
1.3659 + ;
1.3660 + if (aline[i]=='-' && aline[i-1]!='-')
1.3661 + {
1.3662 + if (pswit[ECHO_SWITCH])
1.3663 + printf("\n%s\n",aline);
1.3664 if (!pswit[OVERVIEW_SWITCH])
1.3665 - printf(" Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
1.3666 - else
1.3667 - cnt_punct++;
1.3668 - }
1.3669 - }
1.3670 - }
1.3671 -
1.3672 -
1.3673 -
1.3674 -
1.3675 - /* v.21 breaking out the search for unspaced doublequotes */
1.3676 - /* This is not as efficient, but it's more maintainable */
1.3677 - /* V.97 added underscore to the list of characters not to query, */
1.3678 - /* since underscores are commonly used as italics indicators. */
1.3679 - /* V.98 Added slash as well, same reason. */
1.3680 - for (i = 1; i < llen; i++) { /* for each character in the line after the first */
1.3681 - if (aline[i] == CHAR_DQUOTE) {
1.3682 - if ((!strchr(" _-.'`,;:!/([{?}])", aline[i-1]) &&
1.3683 - !strchr(" _-.'`,;:!/([{?}])", aline[i+1]) &&
1.3684 - aline[i+1] != 0
1.3685 - || (!strchr(" _-([{'`", aline[i-1]) && gcisalpha(aline[i+1])))) {
1.3686 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3687 - if (!pswit[OVERVIEW_SWITCH])
1.3688 - printf(" Line %ld column %d - Unspaced quotes?\n", linecnt, i+1);
1.3689 - else
1.3690 - cnt_punct++;
1.3691 - }
1.3692 - }
1.3693 - }
1.3694 -
1.3695 -
1.3696 - /* v.98 check parity of quotes */
1.3697 - /* v.99 added !*(s+1) in some tests to catch "I am," he said, but I will not be soon". */
1.3698 - for (s = aline; *s; s++) {
1.3699 - if (*s == CHAR_DQUOTE) {
1.3700 - if (!(dquotepar = !dquotepar)) { /* parity even */
1.3701 - if (!strchr("_-.'`/,;:!?)]} ", *(s+1))) {
1.3702 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3703 - if (!pswit[OVERVIEW_SWITCH])
1.3704 - printf(" Line %ld column %d - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
1.3705 - else
1.3706 - cnt_punct++;
1.3707 - }
1.3708 - }
1.3709 - else { /* parity odd */
1.3710 - if (!gcisalpha(*(s+1)) && !isdigit(*(s+1)) && !strchr("_-/.'`([{$", *(s+1)) || !*(s+1)) {
1.3711 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3712 - if (!pswit[OVERVIEW_SWITCH])
1.3713 - printf(" Line %ld column %d - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
1.3714 - else
1.3715 - cnt_punct++;
1.3716 - }
1.3717 - }
1.3718 - }
1.3719 - }
1.3720 -
1.3721 - if (*aline == CHAR_DQUOTE) {
1.3722 - if (strchr(",;:!?)]} ", aline[1])) {
1.3723 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3724 - if (!pswit[OVERVIEW_SWITCH])
1.3725 - printf(" Line %ld column 1 - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
1.3726 - else
1.3727 - cnt_punct++;
1.3728 - }
1.3729 - }
1.3730 -
1.3731 - if (pswit[SQUOTE_SWITCH])
1.3732 - for (s = aline; *s; s++) {
1.3733 - if ((*s == CHAR_SQUOTE || *s == CHAR_OPEN_SQUOTE)
1.3734 - && ( s == aline || (s > aline && !gcisalpha(*(s-1))) || !gcisalpha(*(s+1)))) {
1.3735 - if (!(squotepar = !squotepar)) { /* parity even */
1.3736 - if (!strchr("_-.'`/\",;:!?)]} ", *(s+1))) {
1.3737 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3738 - if (!pswit[OVERVIEW_SWITCH])
1.3739 - printf(" Line %ld column %d - Wrongspaced singlequotes?\n", linecnt, (int)(s - aline)+1);
1.3740 - else
1.3741 - cnt_punct++;
1.3742 - }
1.3743 - }
1.3744 - else { /* parity odd */
1.3745 - if (!gcisalpha(*(s+1)) && !isdigit(*(s+1)) && !strchr("_-/\".'`", *(s+1)) || !*(s+1)) {
1.3746 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3747 - if (!pswit[OVERVIEW_SWITCH])
1.3748 - printf(" Line %ld column %d - Wrongspaced singlequotes?\n", linecnt, (int)(s - aline)+1);
1.3749 - else
1.3750 - cnt_punct++;
1.3751 - }
1.3752 - }
1.3753 - }
1.3754 - }
1.3755 -
1.3756 -
1.3757 - /* v.20 also look for double punctuation like ,. or ,, */
1.3758 - /* Thanks to DW for the suggestion! */
1.3759 - /* I'm putting this in a separate loop for clarity */
1.3760 - /* In books with references, ".," and ".;" are common */
1.3761 - /* e.g. "etc., etc.," and vol. 1.; vol 3.; */
1.3762 - /* OTOH, from my initial tests, there are also fairly */
1.3763 - /* common errors. What to do? Make these cases paranoid? */
1.3764 - /* V.21 ".," is the most common, so invented warn_dotcomma */
1.3765 - /* to suppress detailed reporting if it occurs often */
1.3766 - llen = strlen(aline);
1.3767 - for (i = 0; i < llen; i++) /* for each character in the line */
1.3768 - if (strchr(".?!,;:", aline[i]) /* if it's punctuation */
1.3769 - && (strchr(".?!,;:", aline[i+1]))
1.3770 - && aline[i] && aline[i+1]) /* followed by punctuation, it's a query, unless . . . */
1.3771 - if (
1.3772 - (aline[i] == aline[i+1]
1.3773 - && (aline[i] == '.' || aline[i] == '?' || aline[i] == '!'))
1.3774 - || (!warn_dotcomma && aline[i] == '.' && aline[i+1] == ',')
1.3775 - || (isFrench && !strncmp(aline+i, ",...", 4))
1.3776 - || (isFrench && !strncmp(aline+i, "...,", 4))
1.3777 - || (isFrench && !strncmp(aline+i, ";...", 4))
1.3778 - || (isFrench && !strncmp(aline+i, "...;", 4))
1.3779 - || (isFrench && !strncmp(aline+i, ":...", 4))
1.3780 - || (isFrench && !strncmp(aline+i, "...:", 4))
1.3781 - || (isFrench && !strncmp(aline+i, "!...", 4))
1.3782 - || (isFrench && !strncmp(aline+i, "...!", 4))
1.3783 - || (isFrench && !strncmp(aline+i, "?...", 4))
1.3784 - || (isFrench && !strncmp(aline+i, "...?", 4))
1.3785 - ) {
1.3786 - if ((isFrench && !strncmp(aline+i, ",...", 4)) /* could this BE any more awkward? */
1.3787 - || (isFrench && !strncmp(aline+i, "...,", 4))
1.3788 - || (isFrench && !strncmp(aline+i, ";...", 4))
1.3789 - || (isFrench && !strncmp(aline+i, "...;", 4))
1.3790 - || (isFrench && !strncmp(aline+i, ":...", 4))
1.3791 - || (isFrench && !strncmp(aline+i, "...:", 4))
1.3792 - || (isFrench && !strncmp(aline+i, "!...", 4))
1.3793 - || (isFrench && !strncmp(aline+i, "...!", 4))
1.3794 - || (isFrench && !strncmp(aline+i, "?...", 4))
1.3795 - || (isFrench && !strncmp(aline+i, "...?", 4)))
1.3796 - i +=4;
1.3797 - ; /* do nothing for .. !! and ?? which can be legit */
1.3798 - }
1.3799 - else {
1.3800 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3801 - if (!pswit[OVERVIEW_SWITCH])
1.3802 - printf(" Line %ld column %d - Double punctuation?\n", linecnt, i+1);
1.3803 - else
1.3804 - cnt_punct++;
1.3805 - }
1.3806 -
1.3807 - /* v.21 breaking out the search for spaced doublequotes */
1.3808 - /* This is not as efficient, but it's more maintainable */
1.3809 - s = aline;
1.3810 - while (strstr(s," \" ")) {
1.3811 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3812 - if (!pswit[OVERVIEW_SWITCH])
1.3813 - printf(" Line %ld column %d - Spaced doublequote?\n", linecnt, (int)(strstr(s," \" ")-aline+1));
1.3814 - else
1.3815 - cnt_punct++;
1.3816 - s = strstr(s," \" ") + 2;
1.3817 - }
1.3818 -
1.3819 - /* v.20 also look for spaced singlequotes ' and ` */
1.3820 - s = aline;
1.3821 - while (strstr(s," ' ")) {
1.3822 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3823 - if (!pswit[OVERVIEW_SWITCH])
1.3824 - printf(" Line %ld column %d - Spaced singlequote?\n", linecnt, (int)(strstr(s," ' ")-aline+1));
1.3825 - else
1.3826 - cnt_punct++;
1.3827 - s = strstr(s," ' ") + 2;
1.3828 - }
1.3829 -
1.3830 - s = aline;
1.3831 - while (strstr(s," ` ")) {
1.3832 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3833 - if (!pswit[OVERVIEW_SWITCH])
1.3834 - printf(" Line %ld column %d - Spaced singlequote?\n", linecnt, (int)(strstr(s," ` ")-aline+1));
1.3835 - else
1.3836 - cnt_punct++;
1.3837 - s = strstr(s," ` ") + 2;
1.3838 - }
1.3839 -
1.3840 - /* v.99 check special case of 'S instead of 's at end of word */
1.3841 - s = aline + 1;
1.3842 - while (*s) {
1.3843 - if (*s == CHAR_SQUOTE && *(s+1) == 'S' && *(s-1)>='a' && *(s-1)<='z') {
1.3844 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3845 + printf(" Line %ld column %d - "
1.3846 + "Hyphen at end of line?\n",linecnt,i);
1.3847 + }
1.3848 + }
1.3849 + }
1.3850 + /*
1.3851 + * Brackets are often unspaced, but shouldn't be surrounded by alpha.
1.3852 + * If so, suspect a scanno like "a]most".
1.3853 + */
1.3854 + llen=strlen(aline);
1.3855 + for (i=1;i<llen-1;i++)
1.3856 + {
1.3857 + /* for each bracket character in the line except 1st & last */
1.3858 + if (strchr("{[()]}",aline[i]) && gcisalpha(aline[i-1]) &&
1.3859 + gcisalpha(aline[i+1]))
1.3860 + {
1.3861 + if (pswit[ECHO_SWITCH])
1.3862 + printf("\n%s\n",aline);
1.3863 if (!pswit[OVERVIEW_SWITCH])
1.3864 - printf(" Line %ld column %d - Capital \"S\"?\n", linecnt, (int)(s-aline+2));
1.3865 + printf(" Line %ld column %d - Unspaced bracket?\n",
1.3866 + linecnt,i);
1.3867 else
1.3868 cnt_punct++;
1.3869 - }
1.3870 - s++;
1.3871 - }
1.3872 -
1.3873 -
1.3874 - /* v.21 Now check special cases - start and end of line - */
1.3875 - /* for single and double quotes. Start is sometimes [sic] */
1.3876 - /* but better to query it anyway. */
1.3877 - /* While I'm here, check for dash at end of line */
1.3878 - llen = strlen(aline);
1.3879 - if (llen > 1) {
1.3880 - if (aline[llen-1] == CHAR_DQUOTE ||
1.3881 - aline[llen-1] == CHAR_SQUOTE ||
1.3882 - aline[llen-1] == CHAR_OPEN_SQUOTE)
1.3883 - if (aline[llen-2] == CHAR_SPACE) {
1.3884 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3885 - if (!pswit[OVERVIEW_SWITCH])
1.3886 - printf(" Line %ld column %d - Spaced quote?\n", linecnt, llen);
1.3887 - else
1.3888 - cnt_punct++;
1.3889 - }
1.3890 -
1.3891 - /* V 0.98 removed aline[0] == CHAR_DQUOTE from the test below, since */
1.3892 - /* Wrongspaced quotes test also catches it for " */
1.3893 - if (aline[0] == CHAR_SQUOTE ||
1.3894 - aline[0] == CHAR_OPEN_SQUOTE)
1.3895 - if (aline[1] == CHAR_SPACE) {
1.3896 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3897 - if (!pswit[OVERVIEW_SWITCH])
1.3898 - printf(" Line %ld column 1 - Spaced quote?\n", linecnt);
1.3899 - else
1.3900 - cnt_punct++;
1.3901 - }
1.3902 - /* dash at end of line may well be legit - paranoid mode only */
1.3903 - /* and don't report em-dash at line-end */
1.3904 - if (pswit[PARANOID_SWITCH] && warn_hyphen) {
1.3905 - for (i = llen-1; i > 0 && (unsigned char)aline[i] <= CHAR_SPACE; i--);
1.3906 - if (aline[i] == '-' && aline[i-1] != '-') {
1.3907 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3908 - if (!pswit[OVERVIEW_SWITCH])
1.3909 - printf(" Line %ld column %d - Hyphen at end of line?\n", linecnt, i);
1.3910 - }
1.3911 - }
1.3912 - }
1.3913 -
1.3914 - /* v.21 also look for brackets surrounded by alpha */
1.3915 - /* Brackets are often unspaced, but shouldn't be surrounded by alpha. */
1.3916 - /* If so, suspect a scanno like "a]most" */
1.3917 - llen = strlen(aline);
1.3918 - for (i = 1; i < llen-1; i++) { /* for each character in the line except 1st & last*/
1.3919 - if (strchr("{[()]}", aline[i]) /* if it's a bracket */
1.3920 - && gcisalpha(aline[i-1]) && gcisalpha(aline[i+1])) {
1.3921 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3922 + }
1.3923 + }
1.3924 + llen=strlen(aline);
1.3925 + if (warn_endquote)
1.3926 + {
1.3927 + for (i=1;i<llen;i++)
1.3928 + {
1.3929 + /* for each character in the line except 1st */
1.3930 + if (aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))
1.3931 + {
1.3932 + if (pswit[ECHO_SWITCH])
1.3933 + printf("\n%s\n",aline);
1.3934 + if (!pswit[OVERVIEW_SWITCH])
1.3935 + printf(" Line %ld column %d - "
1.3936 + "endquote missing punctuation?\n",linecnt,i);
1.3937 + else
1.3938 + cnt_punct++;
1.3939 + }
1.3940 + }
1.3941 + }
1.3942 + /*
1.3943 + * Check for <HTML TAG>.
1.3944 + * If there is a < in the line, followed at some point
1.3945 + * by a > then we suspect HTML.
1.3946 + */
1.3947 + if (strstr(aline,"<") && strstr(aline,">"))
1.3948 + {
1.3949 + i=(signed int)(strstr(aline,">")-strstr(aline,"<")+1);
1.3950 + if (i>0)
1.3951 + {
1.3952 + strncpy(wrk,strstr(aline,"<"),i);
1.3953 + wrk[i]=0;
1.3954 + if (pswit[ECHO_SWITCH])
1.3955 + printf("\n%s\n",aline);
1.3956 if (!pswit[OVERVIEW_SWITCH])
1.3957 - printf(" Line %ld column %d - Unspaced bracket?\n", linecnt, i);
1.3958 - else
1.3959 - cnt_punct++;
1.3960 - }
1.3961 - }
1.3962 - /* The "Cinderella" case, back in again! :-S Give it another shot */
1.3963 - if (warn_endquote) {
1.3964 - llen = strlen(aline);
1.3965 - for (i = 1; i < llen; i++) { /* for each character in the line except 1st */
1.3966 - if (aline[i] == CHAR_DQUOTE)
1.3967 - if (isalpha(aline[i-1])) {
1.3968 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3969 - if (!pswit[OVERVIEW_SWITCH])
1.3970 - printf(" Line %ld column %d - endquote missing punctuation?\n", linecnt, i);
1.3971 - else
1.3972 - cnt_punct++;
1.3973 - }
1.3974 - }
1.3975 - }
1.3976 -
1.3977 - llen = strlen(aline);
1.3978 -
1.3979 - /* Check for <HTML TAG> */
1.3980 - /* If there is a < in the line, followed at some point */
1.3981 - /* by a > then we suspect HTML */
1.3982 - if (strstr(aline, "<") && strstr(aline, ">")) {
1.3983 - i = (signed int) (strstr(aline, ">") - strstr(aline, "<") + 1);
1.3984 - if (i > 0) {
1.3985 - strncpy(wrk, strstr(aline, "<"), i);
1.3986 - wrk[i] = 0;
1.3987 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.3988 - if (!pswit[OVERVIEW_SWITCH])
1.3989 - printf(" Line %ld column %d - HTML Tag? %s \n", linecnt, (int)(strstr(aline, "<") - aline) + 1, wrk);
1.3990 + printf(" Line %ld column %d - HTML Tag? %s \n",
1.3991 + linecnt,(int)(strstr(aline,"<")-aline)+1,wrk);
1.3992 else
1.3993 cnt_html++;
1.3994 - }
1.3995 - }
1.3996 -
1.3997 - /* Check for &symbol; HTML */
1.3998 - /* If there is a & in the line, followed at */
1.3999 - /* some point by a ; then we suspect HTML */
1.4000 - if (strstr(aline, "&") && strstr(aline, ";")) {
1.4001 - i = (int)(strstr(aline, ";") - strstr(aline, "&") + 1);
1.4002 - for (s = strstr(aline, "&"); s < strstr(aline, ";"); s++)
1.4003 - if (*s == CHAR_SPACE) i = 0; /* 0.99 don't report "Jones & Son;" */
1.4004 - if (i > 0) {
1.4005 - strncpy(wrk, strstr(aline,"&"), i);
1.4006 - wrk[i] = 0;
1.4007 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
1.4008 + }
1.4009 + }
1.4010 + /*
1.4011 + * Check for &symbol; HTML.
1.4012 + * If there is a & in the line, followed at
1.4013 + * some point by a ; then we suspect HTML.
1.4014 + */
1.4015 + if (strstr(aline,"&") && strstr(aline,";"))
1.4016 + {
1.4017 + i=(int)(strstr(aline,";")-strstr(aline,"&")+1);
1.4018 + for (s=strstr(aline,"&");s<strstr(aline,";");s++)
1.4019 + if (*s==CHAR_SPACE)
1.4020 + i=0; /* Don't report "Jones & Son;" */
1.4021 + if (i>0)
1.4022 + {
1.4023 + strncpy(wrk,strstr(aline,"&"),i);
1.4024 + wrk[i]=0;
1.4025 + if (pswit[ECHO_SWITCH])
1.4026 + printf("\n%s\n",aline);
1.4027 if (!pswit[OVERVIEW_SWITCH])
1.4028 - printf(" Line %ld column %d - HTML symbol? %s \n", linecnt, (int)(strstr(aline, "&") - aline) + 1, wrk);
1.4029 + printf(" Line %ld column %d - HTML symbol? %s \n",
1.4030 + linecnt,(int)(strstr(aline,"&")-aline)+1,wrk);
1.4031 else
1.4032 cnt_html++;
1.4033 - }
1.4034 - }
1.4035 -
1.4036 - /* At end of paragraph, check for mismatched quotes. */
1.4037 - /* We don't want to report an error immediately, since it is a */
1.4038 - /* common convention to omit the quotes at end of paragraph if */
1.4039 - /* the next paragraph is a continuation of the same speaker. */
1.4040 - /* Where this is the case, the next para should begin with a */
1.4041 - /* quote, so we store the warning message and only display it */
1.4042 - /* at the top of the next iteration if the new para doesn't */
1.4043 - /* start with a quote. */
1.4044 - /* The -p switch overrides this default, and warns of unclosed */
1.4045 - /* quotes on _every_ paragraph, whether the next begins with a */
1.4046 - /* quote or not. */
1.4047 - /* Version .16 - only report mismatched single quotes if */
1.4048 - /* an open_single_quotes was found. */
1.4049 -
1.4050 - if (isemptyline) { /* end of para - add up the totals */
1.4051 - if (quot % 2)
1.4052 - sprintf(dquote_err, " Line %ld - Mismatched quotes\n", linecnt);
1.4053 - if (pswit[SQUOTE_SWITCH] && open_single_quote && (open_single_quote != close_single_quote) )
1.4054 - sprintf(squote_err," Line %ld - Mismatched singlequotes?\n", linecnt);
1.4055 - if (pswit[SQUOTE_SWITCH] && open_single_quote
1.4056 - && (open_single_quote != close_single_quote)
1.4057 - && (open_single_quote != close_single_quote +1) )
1.4058 - squot = 1; /* flag it to be noted regardless of the first char of the next para */
1.4059 + }
1.4060 + }
1.4061 + /*
1.4062 + * At end of paragraph, check for mismatched quotes.
1.4063 + * We don't want to report an error immediately, since it is a
1.4064 + * common convention to omit the quotes at end of paragraph if
1.4065 + * the next paragraph is a continuation of the same speaker.
1.4066 + * Where this is the case, the next para should begin with a
1.4067 + * quote, so we store the warning message and only display it
1.4068 + * at the top of the next iteration if the new para doesn't
1.4069 + * start with a quote.
1.4070 + * The -p switch overrides this default, and warns of unclosed
1.4071 + * quotes on _every_ paragraph, whether the next begins with a
1.4072 + * quote or not.
1.4073 + */
1.4074 + if (isemptyline)
1.4075 + {
1.4076 + /* end of para - add up the totals */
1.4077 + if (quot%2)
1.4078 + sprintf(dquote_err," Line %ld - Mismatched quotes\n",
1.4079 + linecnt);
1.4080 + if (pswit[SQUOTE_SWITCH] && open_single_quote &&
1.4081 + open_single_quote!=close_single_quote)
1.4082 + sprintf(squote_err," Line %ld - Mismatched singlequotes?\n",
1.4083 + linecnt);
1.4084 + if (pswit[SQUOTE_SWITCH] && open_single_quote &&
1.4085 + open_single_quote!=close_single_quote &&
1.4086 + open_single_quote!=close_single_quote+1)
1.4087 + /*
1.4088 + * Flag it to be noted regardless of the
1.4089 + * first char of the next para.
1.4090 + */
1.4091 + squot=1;
1.4092 if (r_brack)
1.4093 - sprintf(rbrack_err, " Line %ld - Mismatched round brackets?\n", linecnt);
1.4094 + sprintf(rbrack_err," Line %ld - "
1.4095 + "Mismatched round brackets?\n",linecnt);
1.4096 if (s_brack)
1.4097 - sprintf(sbrack_err, " Line %ld - Mismatched square brackets?\n", linecnt);
1.4098 + sprintf(sbrack_err," Line %ld - "
1.4099 + "Mismatched square brackets?\n",linecnt);
1.4100 if (c_brack)
1.4101 - sprintf(cbrack_err, " Line %ld - Mismatched curly brackets?\n", linecnt);
1.4102 - if (c_unders % 2)
1.4103 - sprintf(unders_err, " Line %ld - Mismatched underscores?\n", linecnt);
1.4104 - quot = s_brack = c_brack = r_brack = c_unders =
1.4105 - open_single_quote = close_single_quote = 0;
1.4106 - isnewpara = 1; /* let the next iteration know that it's starting a new para */
1.4107 - }
1.4108 -
1.4109 - /* V.21 _ALSO_ at end of paragraph, check for omitted punctuation. */
1.4110 - /* by working back through prevline. DW. */
1.4111 - /* Hmmm. Need to check this only for "normal" paras. */
1.4112 - /* So what is a "normal" para? ouch! */
1.4113 - /* Not normal if one-liner (chapter headings, etc.) */
1.4114 - /* Not normal if doesn't contain at least one locase letter */
1.4115 - /* Not normal if starts with space */
1.4116 -
1.4117 - /* 0.99 tighten up on para end checks. Disallow comma and */
1.4118 - /* semi-colon. Check for legit para end before quotes. */
1.4119 - if (isemptyline) { /* end of para */
1.4120 - for (s = prevline, i = 0; *s && !i; s++)
1.4121 + sprintf(cbrack_err," Line %ld - "
1.4122 + "Mismatched curly brackets?\n",linecnt);
1.4123 + if (c_unders%2)
1.4124 + sprintf(unders_err," Line %ld - Mismatched underscores?\n",
1.4125 + linecnt);
1.4126 + quot=s_brack=c_brack=r_brack=c_unders=open_single_quote=
1.4127 + close_single_quote=0;
1.4128 + /* let the next iteration know that it's starting a new para */
1.4129 + isnewpara=1;
1.4130 + }
1.4131 + /*
1.4132 + * Check for omitted punctuation at end of paragraph by working back
1.4133 + * through prevline. DW.
1.4134 + * Need to check this only for "normal" paras.
1.4135 + * So what is a "normal" para?
1.4136 + * Not normal if one-liner (chapter headings, etc.)
1.4137 + * Not normal if doesn't contain at least one locase letter
1.4138 + * Not normal if starts with space
1.4139 + */
1.4140 + if (isemptyline)
1.4141 + {
1.4142 + /* end of para */
1.4143 + for (s=prevline,i=0;*s && !i;s++)
1.4144 if (gcisletter(*s))
1.4145 - i = 1; /* use i to indicate the presence of a letter on the line */
1.4146 - /* This next "if" is a problem. */
1.4147 - /* If I say "start_para_line <= linecnt - 1", that includes one-line */
1.4148 - /* "paragraphs" like chapter heads. Lotsa false positives. */
1.4149 - /* If I say "start_para_line < linecnt - 1" it doesn't, but then it */
1.4150 - /* misses genuine one-line paragraphs. */
1.4151 - /* So what do I do? */
1.4152 - if (i
1.4153 - && lastblen > 2
1.4154 - && start_para_line < linecnt - 1
1.4155 - && *prevline > CHAR_SPACE
1.4156 - ) {
1.4157 - for (i = strlen(prevline)-1; (prevline[i] == CHAR_DQUOTE || prevline[i] == CHAR_SQUOTE) && prevline[i] > CHAR_SPACE && i > 0; i--);
1.4158 - for ( ; i > 0; i--) {
1.4159 - if (gcisalpha(prevline[i])) {
1.4160 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", prevline);
1.4161 + /* use i to indicate the presence of a letter on the line */
1.4162 + i=1;
1.4163 + /*
1.4164 + * This next "if" is a problem.
1.4165 + * If we say "start_para_line <= linecnt - 1", that includes
1.4166 + * one-line "paragraphs" like chapter heads. Lotsa false positives.
1.4167 + * If we say "start_para_line < linecnt - 1" it doesn't, but then it
1.4168 + * misses genuine one-line paragraphs.
1.4169 + */
1.4170 + if (i && lastblen>2 && start_para_line<linecnt-1 &&
1.4171 + *prevline>CHAR_SPACE)
1.4172 + {
1.4173 + for (i=strlen(prevline)-1;
1.4174 + (prevline[i]==CHAR_DQUOTE || prevline[i]==CHAR_SQUOTE) &&
1.4175 + prevline[i]>CHAR_SPACE && i>0;
1.4176 + i--)
1.4177 + ;
1.4178 + for (;i>0;i--)
1.4179 + {
1.4180 + if (gcisalpha(prevline[i]))
1.4181 + {
1.4182 + if (pswit[ECHO_SWITCH])
1.4183 + printf("\n%s\n",prevline);
1.4184 if (!pswit[OVERVIEW_SWITCH])
1.4185 - printf(" Line %ld column %d - No punctuation at para end?\n", linecnt-1, strlen(prevline));
1.4186 + printf(" Line %ld column %d - "
1.4187 + "No punctuation at para end?\n",
1.4188 + linecnt-1,strlen(prevline));
1.4189 else
1.4190 cnt_punct++;
1.4191 break;
1.4192 - }
1.4193 - if (strchr("-.:!([{?}])", prevline[i]))
1.4194 + }
1.4195 + if (strchr("-.:!([{?}])",prevline[i]))
1.4196 break;
1.4197 - }
1.4198 - }
1.4199 - }
1.4200 - strcpy(prevline, aline);
1.4201 + }
1.4202 + }
1.4203 + }
1.4204 + strcpy(prevline,aline);
1.4205 }
1.4206 - fclose (infile);
1.4207 + fclose(infile);
1.4208 if (!pswit[OVERVIEW_SWITCH])
1.4209 - for (i = 0; i < MAX_QWORD; i++)
1.4210 + for (i=0;i<MAX_QWORD;i++)
1.4211 if (dupcnt[i])
1.4212 - printf("\nNote: Queried word %s was duplicated %d time%s\n", qword[i], dupcnt[i], "s");
1.4213 + printf("\nNote: Queried word %s was duplicated %d time%s\n",
1.4214 + qword[i],dupcnt[i],"s");
1.4215 }
1.4216
1.4217 -
1.4218 -
1.4219 -/* flgets - get one line from the input stream, checking for */
1.4220 -/* the existence of exactly one CR/LF line-end per line. */
1.4221 -/* Returns a pointer to the line. */
1.4222 -
1.4223 -char *flgets(char *theline, int maxlen, FILE *thefile, long lcnt)
1.4224 +/*
1.4225 + * flgets:
1.4226 + *
1.4227 + * Get one line from the input stream, checking for
1.4228 + * the existence of exactly one CR/LF line-end per line.
1.4229 + *
1.4230 + * Returns: a pointer to the line.
1.4231 + */
1.4232 +char *flgets(char *theline,int maxlen,FILE *thefile,long lcnt)
1.4233 {
1.4234 char c;
1.4235 - int len, isCR, cint;
1.4236 -
1.4237 - *theline = 0;
1.4238 - len = isCR = 0;
1.4239 - c = cint = fgetc(thefile);
1.4240 - do {
1.4241 - if (cint == EOF)
1.4242 - return (NULL);
1.4243 - if (c == 10) /* either way, it's end of line */
1.4244 + int len,isCR,cint;
1.4245 + *theline=0;
1.4246 + len=isCR=0;
1.4247 + c=cint=fgetc(thefile);
1.4248 + do
1.4249 + {
1.4250 + if (cint==EOF)
1.4251 + return NULL;
1.4252 + /* either way, it's end of line */
1.4253 + if (c==10)
1.4254 + {
1.4255 if (isCR)
1.4256 break;
1.4257 - else { /* Error - a LF without a preceding CR */
1.4258 - if (pswit[LINE_END_SWITCH]) {
1.4259 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
1.4260 + else
1.4261 + {
1.4262 + /* Error - a LF without a preceding CR */
1.4263 + if (pswit[LINE_END_SWITCH])
1.4264 + {
1.4265 + if (pswit[ECHO_SWITCH])
1.4266 + printf("\n%s\n",theline);
1.4267 if (!pswit[OVERVIEW_SWITCH])
1.4268 - printf(" Line %ld - No CR?\n", lcnt);
1.4269 + printf(" Line %ld - No CR?\n",lcnt);
1.4270 else
1.4271 cnt_lineend++;
1.4272 - }
1.4273 + }
1.4274 break;
1.4275 - }
1.4276 - if (c == 13) {
1.4277 - if (isCR) { /* Error - two successive CRs */
1.4278 - if (pswit[LINE_END_SWITCH]) {
1.4279 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
1.4280 + }
1.4281 + }
1.4282 + if (c==13)
1.4283 + {
1.4284 + if (isCR)
1.4285 + {
1.4286 + /* Error - two successive CRs */
1.4287 + if (pswit[LINE_END_SWITCH])
1.4288 + {
1.4289 + if (pswit[ECHO_SWITCH])
1.4290 + printf("\n%s\n",theline);
1.4291 if (!pswit[OVERVIEW_SWITCH])
1.4292 - printf(" Line %ld - Two successive CRs?\n", lcnt);
1.4293 + printf(" Line %ld - Two successive CRs?\n",lcnt);
1.4294 else
1.4295 cnt_lineend++;
1.4296 - }
1.4297 - }
1.4298 - isCR = 1;
1.4299 - }
1.4300 - else {
1.4301 - if (pswit[LINE_END_SWITCH] && isCR) {
1.4302 - if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
1.4303 + }
1.4304 + }
1.4305 + isCR=1;
1.4306 + }
1.4307 + else
1.4308 + {
1.4309 + if (pswit[LINE_END_SWITCH] && isCR)
1.4310 + {
1.4311 + if (pswit[ECHO_SWITCH])
1.4312 + printf("\n%s\n",theline);
1.4313 if (!pswit[OVERVIEW_SWITCH])
1.4314 - printf(" Line %ld column %d - CR without LF?\n", lcnt, len+1);
1.4315 + printf(" Line %ld column %d - CR without LF?\n",
1.4316 + lcnt,len+1);
1.4317 else
1.4318 cnt_lineend++;
1.4319 - }
1.4320 - theline[len] = c;
1.4321 - len++;
1.4322 - theline[len] = 0;
1.4323 - isCR = 0;
1.4324 - }
1.4325 - c = cint = fgetc(thefile);
1.4326 - } while(len < maxlen);
1.4327 + }
1.4328 + theline[len]=c;
1.4329 + len++;
1.4330 + theline[len]=0;
1.4331 + isCR=0;
1.4332 + }
1.4333 + c=cint=fgetc(thefile);
1.4334 + } while(len<maxlen);
1.4335 if (pswit[MARKUP_SWITCH])
1.4336 postprocess_for_HTML(theline);
1.4337 if (pswit[DP_SWITCH])
1.4338 postprocess_for_DP(theline);
1.4339 - return(theline);
1.4340 + return theline;
1.4341 }
1.4342
1.4343 -
1.4344 -
1.4345 -
1.4346 -/* mixdigit - takes a "word" as a parameter, and checks whether it */
1.4347 -/* contains a mixture of alpha and digits. Generally, this is an */
1.4348 -/* error, but may not be for cases like 4th or L5 12s. 3d. */
1.4349 -/* Returns 0 if no error found, 1 if error. */
1.4350 -
1.4351 -int mixdigit(char *checkword) /* check for digits like 1 or 0 in words */
1.4352 +/*
1.4353 + * mixdigit:
1.4354 + *
1.4355 + * Takes a "word" as a parameter, and checks whether it
1.4356 + * contains a mixture of alpha and digits. Generally, this is an
1.4357 + * error, but may not be for cases like 4th or L5 12s. 3d.
1.4358 + *
1.4359 + * Returns: 0 if no error found, 1 if error.
1.4360 + */
1.4361 +int mixdigit(char *checkword)
1.4362 {
1.4363 - int wehaveadigit, wehavealetter, firstdigits, query, wl;
1.4364 + int wehaveadigit,wehavealetter,firstdigits,query,wl;
1.4365 char *s;
1.4366 -
1.4367 -
1.4368 - wehaveadigit = wehavealetter = query = 0;
1.4369 - for (s = checkword; *s; s++)
1.4370 + wehaveadigit=wehavealetter=query=0;
1.4371 + for (s=checkword;*s;s++)
1.4372 if (gcisalpha(*s))
1.4373 - wehavealetter = 1;
1.4374 + wehavealetter=1;
1.4375 else
1.4376 if (gcisdigit(*s))
1.4377 - wehaveadigit = 1;
1.4378 - if (wehaveadigit && wehavealetter) { /* Now exclude common legit cases, like "21st" and "12l. 3s. 11d." */
1.4379 - query = 1;
1.4380 - wl = strlen(checkword);
1.4381 - for (firstdigits = 0; gcisdigit(checkword[firstdigits]); firstdigits++)
1.4382 + wehaveadigit=1;
1.4383 + if (wehaveadigit && wehavealetter)
1.4384 + {
1.4385 + /* Now exclude common legit cases, like "21st" and "12l. 3s. 11d." */
1.4386 + query=1;
1.4387 + wl=strlen(checkword);
1.4388 + for (firstdigits=0;gcisdigit(checkword[firstdigits]);firstdigits++)
1.4389 ;
1.4390 /* digits, ending in st, rd, nd, th of either case */
1.4391 - /* 0.99 donovan points out an error below. Turns out */
1.4392 - /* I was using matchword like strcmp when the */
1.4393 - /* return values are different! Duh. */
1.4394 - if (firstdigits + 2 == wl &&
1.4395 - (matchword(checkword + wl - 2, "st")
1.4396 - || matchword(checkword + wl - 2, "rd")
1.4397 - || matchword(checkword + wl - 2, "nd")
1.4398 - || matchword(checkword + wl - 2, "th"))
1.4399 - )
1.4400 - query = 0;
1.4401 - if (firstdigits + 3 == wl &&
1.4402 - (matchword(checkword + wl - 3, "sts")
1.4403 - || matchword(checkword + wl - 3, "rds")
1.4404 - || matchword(checkword + wl - 3, "nds")
1.4405 - || matchword(checkword + wl - 3, "ths"))
1.4406 - )
1.4407 - query = 0;
1.4408 - if (firstdigits + 3 == wl &&
1.4409 - (matchword(checkword + wl - 4, "stly")
1.4410 - || matchword(checkword + wl - 4, "rdly")
1.4411 - || matchword(checkword + wl - 4, "ndly")
1.4412 - || matchword(checkword + wl - 4, "thly"))
1.4413 - )
1.4414 - query = 0;
1.4415 -
1.4416 + if (firstdigits+2==wl && (matchword(checkword+wl-2,"st") ||
1.4417 + matchword(checkword+wl-2,"rd") || matchword(checkword+wl-2,"nd") ||
1.4418 + matchword(checkword+wl-2,"th")))
1.4419 + query=0;
1.4420 + if (firstdigits+3==wl && (matchword(checkword+wl-3,"sts") ||
1.4421 + matchword(checkword+wl-3,"rds") || matchword(checkword+wl-3,"nds") ||
1.4422 + matchword(checkword+wl-3,"ths")))
1.4423 + query=0;
1.4424 + if (firstdigits+3==wl && (matchword(checkword+wl-4,"stly") ||
1.4425 + matchword(checkword+wl-4,"rdly") ||
1.4426 + matchword(checkword+wl-4,"ndly") || matchword(checkword+wl-4,"thly")))
1.4427 + query=0;
1.4428 /* digits, ending in l, L, s or d */
1.4429 - if (firstdigits + 1 == wl &&
1.4430 - (checkword[wl-1] == 'l'
1.4431 - || checkword[wl-1] == 'L'
1.4432 - || checkword[wl-1] == 's'
1.4433 - || checkword[wl-1] == 'd'))
1.4434 - query = 0;
1.4435 - /* L at the start of a number, representing Britsh pounds, like L500 */
1.4436 - /* This is cute. We know the current word is mixeddigit. If the first */
1.4437 - /* letter is L, there must be at least one digit following. If both */
1.4438 - /* digits and letters follow, we have a genuine error, else we have a */
1.4439 - /* capital L followed by digits, and we accept that as a non-error. */
1.4440 - if (checkword[0] == 'L')
1.4441 - if (!mixdigit(checkword+1))
1.4442 - query = 0;
1.4443 - }
1.4444 - return (query);
1.4445 + if (firstdigits+1==wl && (checkword[wl-1]=='l' ||
1.4446 + checkword[wl-1]=='L' || checkword[wl-1]=='s' || checkword[wl-1]=='d'))
1.4447 + query=0;
1.4448 + /*
1.4449 + * L at the start of a number, representing Britsh pounds, like L500.
1.4450 + * This is cute. We know the current word is mixeddigit. If the first
1.4451 + * letter is L, there must be at least one digit following. If both
1.4452 + * digits and letters follow, we have a genuine error, else we have a
1.4453 + * capital L followed by digits, and we accept that as a non-error.
1.4454 + */
1.4455 + if (checkword[0]=='L' && !mixdigit(checkword+1))
1.4456 + query=0;
1.4457 + }
1.4458 + return query;
1.4459 }
1.4460
1.4461 -
1.4462 -
1.4463 -
1.4464 -/* getaword - extracts the first/next "word" from the line, and puts */
1.4465 -/* it into "thisword". A word is defined as one English word unit */
1.4466 -/* -- or at least that's what I'm trying for. */
1.4467 -/* Returns a pointer to the position in the line where we will start */
1.4468 -/* looking for the next word. */
1.4469 -
1.4470 -char *getaword(char *fromline, char *thisword)
1.4471 +/*
1.4472 + * getaword:
1.4473 + *
1.4474 + * Extracts the first/next "word" from the line, and puts
1.4475 + * it into "thisword". A word is defined as one English word unit--or
1.4476 + * at least that's the aim.
1.4477 + *
1.4478 + * Returns: a pointer to the position in the line where we will start
1.4479 + * looking for the next word.
1.4480 + */
1.4481 +char *getaword(char *fromline,char *thisword)
1.4482 {
1.4483 - int i, wordlen;
1.4484 + int i,wordlen;
1.4485 char *s;
1.4486 -
1.4487 - wordlen = 0;
1.4488 - for ( ; !gcisdigit(*fromline) && !gcisalpha(*fromline) && *fromline ; fromline++ );
1.4489 -
1.4490 - /* V .20 */
1.4491 - /* add a look-ahead to handle exceptions for numbers like 1,000 and 1.35. */
1.4492 - /* Especially yucky is the case of L1,000 */
1.4493 - /* I hate this, and I see other ways, but I don't see that any is _better_.*/
1.4494 - /* This section looks for a pattern of characters including a digit */
1.4495 - /* followed by a comma or period followed by one or more digits. */
1.4496 - /* If found, it returns this whole pattern as a word; otherwise we discard */
1.4497 - /* the results and resume our normal programming. */
1.4498 - s = fromline;
1.4499 - for ( ; (gcisdigit(*s) || gcisalpha(*s) || *s == ',' || *s == '.') && wordlen < MAXWORDLEN ; s++ ) {
1.4500 - thisword[wordlen] = *s;
1.4501 + wordlen=0;
1.4502 + for (;!gcisdigit(*fromline) && !gcisalpha(*fromline) && *fromline;
1.4503 + fromline++)
1.4504 + ;
1.4505 + /*
1.4506 + * Use a look-ahead to handle exceptions for numbers like 1,000 and 1.35.
1.4507 + * Especially yucky is the case of L1,000
1.4508 + * This section looks for a pattern of characters including a digit
1.4509 + * followed by a comma or period followed by one or more digits.
1.4510 + * If found, it returns this whole pattern as a word; otherwise we discard
1.4511 + * the results and resume our normal programming.
1.4512 + */
1.4513 + s=fromline;
1.4514 + for (;(gcisdigit(*s) || gcisalpha(*s) || *s==',' || *s=='.') &&
1.4515 + wordlen<MAXWORDLEN;s++)
1.4516 + {
1.4517 + thisword[wordlen]=*s;
1.4518 wordlen++;
1.4519 - }
1.4520 - thisword[wordlen] = 0;
1.4521 - for (i = 1; i < wordlen -1; i++) {
1.4522 - if (thisword[i] == '.' || thisword[i] == ',') {
1.4523 - if (gcisdigit(thisword[i-1]) && gcisdigit(thisword[i-1])) { /* we have one of the damned things */
1.4524 - fromline = s;
1.4525 - return(fromline);
1.4526 - }
1.4527 - }
1.4528 - }
1.4529 -
1.4530 + }
1.4531 + thisword[wordlen]=0;
1.4532 + for (i=1;i<wordlen-1;i++)
1.4533 + {
1.4534 + if (thisword[i]=='.' || thisword[i]==',')
1.4535 + {
1.4536 + if (gcisdigit(thisword[i-1]) && gcisdigit(thisword[i-1]))
1.4537 + {
1.4538 + fromline=s;
1.4539 + return fromline;
1.4540 + }
1.4541 + }
1.4542 + }
1.4543 /* we didn't find a punctuated number - do the regular getword thing */
1.4544 - wordlen = 0;
1.4545 - for ( ; (gcisdigit(*fromline) || gcisalpha(*fromline) || *fromline == '\'') && wordlen < MAXWORDLEN ; fromline++ ) {
1.4546 - thisword[wordlen] = *fromline;
1.4547 + wordlen=0;
1.4548 + for (;(gcisdigit(*fromline) || gcisalpha(*fromline) || *fromline=='\'') &&
1.4549 + wordlen<MAXWORDLEN;fromline++)
1.4550 + {
1.4551 + thisword[wordlen]=*fromline;
1.4552 wordlen++;
1.4553 - }
1.4554 - thisword[wordlen] = 0;
1.4555 - return(fromline);
1.4556 + }
1.4557 + thisword[wordlen]=0;
1.4558 + return fromline;
1.4559 }
1.4560
1.4561 -
1.4562 -
1.4563 -
1.4564 -
1.4565 -/* matchword - just a case-insensitive string matcher */
1.4566 -/* yes, I know this is not efficient. I'll worry about */
1.4567 -/* that when I have a clear idea where I'm going with it.*/
1.4568 -
1.4569 -int matchword(char *checkfor, char *thisword)
1.4570 +/*
1.4571 + * matchword:
1.4572 + *
1.4573 + * A case-insensitive string matcher.
1.4574 + */
1.4575 +int matchword(char *checkfor,char *thisword)
1.4576 {
1.4577 - unsigned int ismatch, i;
1.4578 -
1.4579 - if (strlen(checkfor) != strlen(thisword)) return(0);
1.4580 -
1.4581 - ismatch = 1; /* assume a match until we find a difference */
1.4582 - for (i = 0; i <strlen(checkfor); i++)
1.4583 - if (toupper(checkfor[i]) != toupper(thisword[i]))
1.4584 - ismatch = 0;
1.4585 - return (ismatch);
1.4586 + unsigned int ismatch,i;
1.4587 + if (strlen(checkfor)!=strlen(thisword))
1.4588 + return 0;
1.4589 + ismatch=1; /* assume a match until we find a difference */
1.4590 + for (i=0;i<strlen(checkfor);i++)
1.4591 + if (toupper(checkfor[i])!=toupper(thisword[i]))
1.4592 + ismatch=0;
1.4593 + return ismatch;
1.4594 }
1.4595
1.4596 -
1.4597 -
1.4598 -
1.4599 -
1.4600 -/* lowerit - lowercase the line. Yes, strlwr does the same job, */
1.4601 -/* but not on all platforms, and I'm a bit paranoid about what */
1.4602 -/* some implementations of tolower might do to hi-bit characters,*/
1.4603 -/* which shouldn't matter, but better safe than sorry. */
1.4604 +/*
1.4605 + * lowerit:
1.4606 + *
1.4607 + * Lowercase the line.
1.4608 + */
1.4609
1.4610 void lowerit(char *theline)
1.4611 {
1.4612 - for ( ; *theline; theline++)
1.4613 - if (*theline >='A' && *theline <='Z')
1.4614 - *theline += 32;
1.4615 + for (;*theline;theline++)
1.4616 + if (*theline>='A' && *theline<='Z')
1.4617 + *theline+=32;
1.4618 }
1.4619
1.4620 -
1.4621 -/* Is this word a Roman Numeral? */
1.4622 -/* v 0.99 improved to be better. It still doesn't actually */
1.4623 -/* validate that the number is a valid Roman Numeral -- for example */
1.4624 -/* it will pass MXXXXXXXXXX as a valid Roman Numeral, but that's not*/
1.4625 -/* what we're here to do. If it passes this, it LOOKS like a Roman */
1.4626 -/* numeral. Anyway, the actual Romans were pretty tolerant of bad */
1.4627 -/* arithmetic, or expressions thereof, except when it came to taxes.*/
1.4628 -/* Allow any number of M, an optional D, an optional CM or CD, */
1.4629 -/* any number of optional Cs, an optional XL or an optional XC, an */
1.4630 -/* optional IX or IV, an optional V and any number of optional Is. */
1.4631 -/* Good enough for jazz chords. */
1.4632 -
1.4633 +/*
1.4634 + * isroman:
1.4635 + *
1.4636 + * Is this word a Roman Numeral?
1.4637 + *
1.4638 + * It doesn't actually validate that the number is a valid Roman Numeral--for
1.4639 + * example it will pass MXXXXXXXXXX as a valid Roman Numeral, but that's not
1.4640 + * what we're here to do. If it passes this, it LOOKS like a Roman numeral.
1.4641 + * Anyway, the actual Romans were pretty tolerant of bad arithmetic, or
1.4642 + * expressions thereof, except when it came to taxes. Allow any number of M,
1.4643 + * an optional D, an optional CM or CD, any number of optional Cs, an optional
1.4644 + * XL or an optional XC, an optional IX or IV, an optional V and any number
1.4645 + * of optional Is.
1.4646 + */
1.4647 int isroman(char *t)
1.4648 {
1.4649 char *s;
1.4650 -
1.4651 - if (!t || !*t) return (0);
1.4652 -
1.4653 - s = t;
1.4654 -
1.4655 - while (*t == 'm' && *t ) t++;
1.4656 - if (*t == 'd') t++;
1.4657 - if (*t == 'c' && *(t+1) == 'm') t+=2;
1.4658 - if (*t == 'c' && *(t+1) == 'd') t+=2;
1.4659 - while (*t == 'c' && *t) t++;
1.4660 - if (*t == 'x' && *(t+1) == 'l') t+=2;
1.4661 - if (*t == 'x' && *(t+1) == 'c') t+=2;
1.4662 - if (*t == 'l') t++;
1.4663 - while (*t == 'x' && *t) t++;
1.4664 - if (*t == 'i' && *(t+1) == 'x') t+=2;
1.4665 - if (*t == 'i' && *(t+1) == 'v') t+=2;
1.4666 - if (*t == 'v') t++;
1.4667 - while (*t == 'i' && *t) t++;
1.4668 - if (!*t) return (1);
1.4669 -
1.4670 - return(0);
1.4671 + if (!t || !*t)
1.4672 + return 0;
1.4673 + s=t;
1.4674 + while (*t=='m' && *t)
1.4675 + t++;
1.4676 + if (*t=='d')
1.4677 + t++;
1.4678 + if (*t=='c' && t[1]=='m')
1.4679 + t+=2;
1.4680 + if (*t=='c' && t[1]=='d')
1.4681 + t+=2;
1.4682 + while (*t=='c' && *t)
1.4683 + t++;
1.4684 + if (*t=='x' && t[1]=='l')
1.4685 + t+=2;
1.4686 + if (*t=='x' && t[1]=='c')
1.4687 + t+=2;
1.4688 + if (*t=='l')
1.4689 + t++;
1.4690 + while (*t=='x' && *t)
1.4691 + t++;
1.4692 + if (*t=='i' && t[1]=='x')
1.4693 + t+=2;
1.4694 + if (*t=='i' && t[1]=='v')
1.4695 + t+=2;
1.4696 + if (*t=='v')
1.4697 + t++;
1.4698 + while (*t=='i' && *t)
1.4699 + t++;
1.4700 + return !*t;
1.4701 }
1.4702
1.4703 -
1.4704 -
1.4705 -
1.4706 -/* gcisalpha is a special version that is somewhat lenient on 8-bit texts. */
1.4707 -/* If we use the standard isalpha() function, 8-bit accented characters break */
1.4708 -/* words, so that tete with accented characters appears to be two words, "t" */
1.4709 -/* and "t", with 8-bit characters between them. This causes over-reporting of */
1.4710 -/* errors. gcisalpha() recognizes accented letters from the CP1252 (Windows) */
1.4711 -/* and ISO-8859-1 character sets, which are the most common PG 8-bit types. */
1.4712 -
1.4713 +/*
1.4714 + * gcisalpha:
1.4715 + *
1.4716 + * A version of isalpha() that is somewhat lenient on 8-bit texts.
1.4717 + * If we use the standard function, 8-bit accented characters break
1.4718 + * words, so that tete with accented characters appears to be two words, "t"
1.4719 + * and "t", with 8-bit characters between them. This causes over-reporting of
1.4720 + * errors. gcisalpha() recognizes accented letters from the CP1252 (Windows)
1.4721 + * and ISO-8859-1 character sets, which are the most common PG 8-bit types.
1.4722 + */
1.4723 int gcisalpha(unsigned char c)
1.4724 {
1.4725 - if (c >='a' && c <='z') return(1);
1.4726 - if (c >='A' && c <='Z') return(1);
1.4727 - if (c < 140) return(0);
1.4728 - if (c >=192 && c != 208 && c != 215 && c != 222 && c != 240 && c != 247 && c != 254) return(1);
1.4729 - if (c == 140 || c == 142 || c == 156 || c == 158 || c == 159) return (1);
1.4730 - return(0);
1.4731 + if (c>='a' && c<='z')
1.4732 + return 1;
1.4733 + if (c>='A' && c<='Z')
1.4734 + return 1;
1.4735 + if (c<140)
1.4736 + return 0;
1.4737 + if (c>=192 && c!=208 && c!=215 && c!=222 && c!=240 && c!=247 && c!=254)
1.4738 + return 1;
1.4739 + if (c==140 || c==142 || c==156 || c==158 || c==159)
1.4740 + return 1;
1.4741 + return 0;
1.4742 }
1.4743
1.4744 -/* gcisdigit is a special version that doesn't get confused in 8-bit texts. */
1.4745 +/*
1.4746 + * gcisdigit:
1.4747 + *
1.4748 + * A version of isdigit() that doesn't get confused in 8-bit texts.
1.4749 + */
1.4750 int gcisdigit(unsigned char c)
1.4751 {
1.4752 - if (c >= '0' && c <='9') return(1);
1.4753 - return(0);
1.4754 + return c>='0' && c<='9';
1.4755 }
1.4756
1.4757 -/* gcisletter is a special version that doesn't get confused in 8-bit texts. */
1.4758 -/* Yeah, we're ISO-8891-1-specific. So sue me. */
1.4759 +/*
1.4760 + * gcisletter:
1.4761 + *
1.4762 + * A version of isletter() that doesn't get confused in 8-bit texts.
1.4763 + * NB: this is ISO-8891-1-specific.
1.4764 + */
1.4765 int gcisletter(unsigned char c)
1.4766 {
1.4767 - if ((c >= 'A' && c <='Z') || (c >= 'a' && c <='z') || c >= 192) return(1);
1.4768 - return(0);
1.4769 + return c>='A' && c<='Z' || c>='a' && c<='z' || c>=192;
1.4770 }
1.4771
1.4772 -
1.4773 -
1.4774 -
1.4775 -/* gcstrchr wraps strchr to return NULL if the character being searched for is zero */
1.4776 -
1.4777 -char *gcstrchr(char *s, char c)
1.4778 +/*
1.4779 + * gcstrchr:
1.4780 + *
1.4781 + * Wraps strchr to return NULL if the character being searched for is zero.
1.4782 + */
1.4783 +char *gcstrchr(char *s,char c)
1.4784 {
1.4785 - if (c == 0) return(NULL);
1.4786 - return(strchr(s,c));
1.4787 + if (!c)
1.4788 + return NULL;
1.4789 + return strchr(s,c);
1.4790 }
1.4791
1.4792 -/* postprocess_for_DP is derived from postprocess_for_HTML */
1.4793 -/* It is invoked with the -d switch from flgets(). */
1.4794 -/* It simply "removes" from the line a hard-coded set of common */
1.4795 -/* DP-specific tags, so that the line passed to the main routine has*/
1.4796 -/* been pre-cleaned of DP markup. */
1.4797 -
1.4798 +/*
1.4799 + * postprocess_for_DP:
1.4800 + *
1.4801 + * Invoked with the -d switch from flgets().
1.4802 + * It simply "removes" from the line a hard-coded set of common
1.4803 + * DP-specific tags, so that the line passed to the main routine has
1.4804 + * been pre-cleaned of DP markup.
1.4805 + */
1.4806 void postprocess_for_DP(char *theline)
1.4807 {
1.4808 -
1.4809 - char *s, *t;
1.4810 + char *s,*t;
1.4811 int i;
1.4812 -
1.4813 if (!*theline)
1.4814 return;
1.4815 -
1.4816 - for (i = 0; *DPmarkup[i]; i++) {
1.4817 - s = strstr(theline, DPmarkup[i]);
1.4818 - while (s) {
1.4819 - t = s + strlen(DPmarkup[i]);
1.4820 - while (*t) {
1.4821 - *s = *t;
1.4822 - t++; s++;
1.4823 - }
1.4824 - *s = 0;
1.4825 - s = strstr(theline, DPmarkup[i]);
1.4826 - }
1.4827 - }
1.4828 -
1.4829 + for (i=0;*DPmarkup[i];i++)
1.4830 + {
1.4831 + s=strstr(theline,DPmarkup[i]);
1.4832 + while (s)
1.4833 + {
1.4834 + t=s+strlen(DPmarkup[i]);
1.4835 + while (*t)
1.4836 + {
1.4837 + *s=*t;
1.4838 + t++;
1.4839 + s++;
1.4840 + }
1.4841 + *s=0;
1.4842 + s=strstr(theline,DPmarkup[i]);
1.4843 + }
1.4844 + }
1.4845 }
1.4846
1.4847 -
1.4848 -/* postprocess_for_HTML is, at the moment (0.97), a very nasty */
1.4849 -/* short-term fix for Charlz. Nasty, nasty, nasty. */
1.4850 -/* It is invoked with the -m switch from flgets(). */
1.4851 -/* It simply "removes" from the line a hard-coded set of common */
1.4852 -/* HTML tags and "replaces" a hard-coded set of common HTML */
1.4853 -/* entities, so that the line passed to the main routine has */
1.4854 -/* been pre-cleaned of HTML. This is _so_ not the right way to */
1.4855 -/* deal with HTML, but what Charlz needs now is not HTML handling */
1.4856 -/* proper: just ignoring <i> tags and some others. */
1.4857 -/* To be revisited in future releases! */
1.4858 -
1.4859 +/*
1.4860 + * postprocess_for_HTML:
1.4861 + *
1.4862 + * Invoked with the -m switch from flgets().
1.4863 + * It simply "removes" from the line a hard-coded set of common
1.4864 + * HTML tags and "replaces" a hard-coded set of common HTML
1.4865 + * entities, so that the line passed to the main routine has
1.4866 + * been pre-cleaned of HTML.
1.4867 + */
1.4868 void postprocess_for_HTML(char *theline)
1.4869 {
1.4870 -
1.4871 - if (strstr(theline, "<") && strstr(theline, ">"))
1.4872 + if (strstr(theline,"<") && strstr(theline,">"))
1.4873 while (losemarkup(theline))
1.4874 ;
1.4875 while (loseentities(theline))
1.4876 @@ -2506,477 +2847,121 @@
1.4877
1.4878 char *losemarkup(char *theline)
1.4879 {
1.4880 - char *s, *t;
1.4881 + char *s,*t;
1.4882 int i;
1.4883 -
1.4884 if (!*theline)
1.4885 - return(NULL);
1.4886 -
1.4887 - s = strstr(theline, "<");
1.4888 - t = strstr(theline, ">");
1.4889 - if (!s || !t) return(NULL);
1.4890 - for (i = 0; *markup[i]; i++)
1.4891 - if (!tagcomp(s+1, markup[i])) {
1.4892 - if (!*(t+1)) {
1.4893 - *s = 0;
1.4894 - return(s);
1.4895 - }
1.4896 - else
1.4897 - if (t > s) {
1.4898 - strcpy(s, t+1);
1.4899 - return(s);
1.4900 - }
1.4901 + return NULL;
1.4902 + s=strstr(theline,"<");
1.4903 + t=strstr(theline,">");
1.4904 + if (!s || !t)
1.4905 + return NULL;
1.4906 + for (i=0;*markup[i];i++)
1.4907 + if (!tagcomp(s+1,markup[i]))
1.4908 + {
1.4909 + if (!t[1])
1.4910 + {
1.4911 + *s=0;
1.4912 + return s;
1.4913 + }
1.4914 + else if (t>s)
1.4915 + {
1.4916 + strcpy(s,t+1);
1.4917 + return s;
1.4918 + }
1.4919 }
1.4920 - /* it's an unrecognized <xxx> */
1.4921 - return(NULL);
1.4922 + /* It's an unrecognized <xxx>. */
1.4923 + return NULL;
1.4924 }
1.4925
1.4926 char *loseentities(char *theline)
1.4927 {
1.4928 int i;
1.4929 - char *s, *t;
1.4930 -
1.4931 + char *s,*t;
1.4932 if (!*theline)
1.4933 - return(NULL);
1.4934 -
1.4935 - for (i = 0; *entities[i].htmlent; i++) {
1.4936 - s = strstr(theline, entities[i].htmlent);
1.4937 - if (s) {
1.4938 - t = malloc((size_t)strlen(s));
1.4939 - if (!t) return(NULL);
1.4940 - strcpy(t, s + strlen(entities[i].htmlent));
1.4941 - strcpy(s, entities[i].textent);
1.4942 - strcat(s, t);
1.4943 + return NULL;
1.4944 + for (i=0;*entities[i].htmlent;i++)
1.4945 + {
1.4946 + s=strstr(theline,entities[i].htmlent);
1.4947 + if (s)
1.4948 + {
1.4949 + t=malloc((size_t)strlen(s));
1.4950 + if (!t)
1.4951 + return NULL;
1.4952 + strcpy(t,s+strlen(entities[i].htmlent));
1.4953 + strcpy(s,entities[i].textent);
1.4954 + strcat(s,t);
1.4955 free(t);
1.4956 - return(theline);
1.4957 - }
1.4958 - }
1.4959 -
1.4960 - /* V0.97 Duh. Forgot to check the htmlnum member */
1.4961 - for (i = 0; *entities[i].htmlnum; i++) {
1.4962 - s = strstr(theline, entities[i].htmlnum);
1.4963 - if (s) {
1.4964 - t = malloc((size_t)strlen(s));
1.4965 - if (!t) return(NULL);
1.4966 - strcpy(t, s + strlen(entities[i].htmlnum));
1.4967 - strcpy(s, entities[i].textent);
1.4968 - strcat(s, t);
1.4969 + return theline;
1.4970 + }
1.4971 + }
1.4972 + for (i=0;*entities[i].htmlnum;i++)
1.4973 + {
1.4974 + s=strstr(theline,entities[i].htmlnum);
1.4975 + if (s)
1.4976 + {
1.4977 + t=malloc((size_t)strlen(s));
1.4978 + if (!t)
1.4979 + return NULL;
1.4980 + strcpy(t,s+strlen(entities[i].htmlnum));
1.4981 + strcpy(s,entities[i].textent);
1.4982 + strcat(s,t);
1.4983 free(t);
1.4984 - return(theline);
1.4985 - }
1.4986 - }
1.4987 - return(NULL);
1.4988 + return theline;
1.4989 + }
1.4990 + }
1.4991 + return NULL;
1.4992 }
1.4993
1.4994 -
1.4995 -int tagcomp(char *strin, char *basetag)
1.4996 +int tagcomp(char *strin,char *basetag)
1.4997 {
1.4998 - char *s, *t;
1.4999 -
1.5000 - s = basetag;
1.5001 - t = strin;
1.5002 - if (*t == '/') t++; /* ignore a slash */
1.5003 - while (*s && *t) {
1.5004 - if (tolower(*s) != tolower(*t)) return(1);
1.5005 - s++; t++;
1.5006 - }
1.5007 - /* OK, we have < followed by a valid tag start */
1.5008 - /* should I do something about length? */
1.5009 - /* this is messy. The length of an <i> tag is */
1.5010 - /* limited, but a <table> could go on for miles */
1.5011 - /* so I'd have to parse the tags . . . ugh. */
1.5012 - /* It isn't what Charlz needs now, so mark it */
1.5013 - /* as 'pending'. */
1.5014 - return(0);
1.5015 + char *s,*t;
1.5016 + s=basetag;
1.5017 + t=strin;
1.5018 + if (*t=='/')
1.5019 + t++; /* ignore a slash */
1.5020 + while (*s && *t)
1.5021 + {
1.5022 + if (tolower(*s)!=tolower(*t))
1.5023 + return 1;
1.5024 + s++;
1.5025 + t++;
1.5026 + }
1.5027 + return 0;
1.5028 }
1.5029
1.5030 -void proghelp() /* explain program usage here */
1.5031 +void proghelp()
1.5032 {
1.5033 - fputs("V. 0.991. Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>.\n",stderr);
1.5034 - fputs("Gutcheck comes wih ABSOLUTELY NO WARRANTY. For details, read the file COPYING.\n", stderr);
1.5035 - fputs("This is Free Software; you may redistribute it under certain conditions (GPL);\n", stderr);
1.5036 - fputs("read the file COPYING for details.\n\n", stderr);
1.5037 - fputs("Usage is: gutcheck [-setpxloyhud] filename\n",stderr);
1.5038 - fputs(" where -s checks single quotes, -e suppresses echoing lines, -t checks typos\n",stderr);
1.5039 - fputs(" -x (paranoid) switches OFF -t and extra checks, -l turns OFF line-end checks\n",stderr);
1.5040 - fputs(" -o just displays overview without detail, -h echoes header fields\n",stderr);
1.5041 - fputs(" -v (verbose) unsuppresses duplicate reporting, -m suppresses markup\n",stderr);
1.5042 + fputs("Bookloupe version " PACKAGE_VERSION ".\n",stderr);
1.5043 + fputs("Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>.\n",stderr);
1.5044 + fputs("Copyright 2012- J. Ali Harlow <ali@juiblex.co.uk>.\n",stderr);
1.5045 + fputs("Bookloupe comes wih ABSOLUTELY NO WARRANTY. "
1.5046 + "For details, read the file COPYING.\n",stderr);
1.5047 + fputs("This is Free Software; "
1.5048 + "you may redistribute it under certain conditions (GPL);\n",stderr);
1.5049 + fputs("read the file COPYING for details.\n\n",stderr);
1.5050 + fputs("Usage is: bookloupe [-setpxloyhud] filename\n",stderr);
1.5051 + fputs(" where -s checks single quotes, -e suppresses echoing lines, "
1.5052 + "-t checks typos\n",stderr);
1.5053 + fputs(" -x (paranoid) switches OFF -t and extra checks, "
1.5054 + "-l turns OFF line-end checks\n",stderr);
1.5055 + fputs(" -o just displays overview without detail, "
1.5056 + "-h echoes header fields\n",stderr);
1.5057 + fputs(" -v (verbose) unsuppresses duplicate reporting, "
1.5058 + "-m suppresses markup\n",stderr);
1.5059 fputs(" -d ignores DP-specific markup,\n",stderr);
1.5060 - fputs(" -u uses a file gutcheck.typ to query user-defined possible typos\n",stderr);
1.5061 - fputs("Sample usage: gutcheck warpeace.txt \n",stderr);
1.5062 + fputs(" -u uses a file gutcheck.typ to query user-defined "
1.5063 + "possible typos\n",stderr);
1.5064 + fputs("Sample usage: bookloupe warpeace.txt \n",stderr);
1.5065 fputs("\n",stderr);
1.5066 - fputs("Gutcheck looks for errors in Project Gutenberg(TM) etexts.\n", stderr);
1.5067 - fputs("Gutcheck queries anything it thinks shouldn't be in a PG text; non-ASCII\n",stderr);
1.5068 - fputs("characters like accented letters, lines longer than 75 or shorter than 55,\n",stderr);
1.5069 - fputs("unbalanced quotes or brackets, a variety of badly formatted punctuation, \n",stderr);
1.5070 - fputs("HTML tags, some likely typos. It is NOT a substitute for human judgement.\n",stderr);
1.5071 + fputs("Bookloupe looks for errors in Project Gutenberg(TM) etexts.\n",
1.5072 + stderr);
1.5073 + fputs("Bookloupe queries anything it thinks shouldn't be in a PG text; "
1.5074 + "non-ASCII\n",stderr);
1.5075 + fputs("characters like accented letters, "
1.5076 + "lines longer than 75 or shorter than 55,\n",stderr);
1.5077 + fputs("unbalanced quotes or brackets, "
1.5078 + "a variety of badly formatted punctuation, \n",stderr);
1.5079 + fputs("HTML tags, some likely typos. "
1.5080 + "It is NOT a substitute for human judgement.\n",stderr);
1.5081 fputs("\n",stderr);
1.5082 }
1.5083 -
1.5084 -
1.5085 -
1.5086 -/*********************************************************************
1.5087 - Revision History:
1.5088 -
1.5089 - 04/22/01 Cleaned up some stuff and released .10
1.5090 -
1.5091 - ---------------
1.5092 -
1.5093 - 05/09/01 Added the typo list, added two extra cases of he/be error,
1.5094 - added -p switch, OPEN_SINGLE QUOTE char as .11
1.5095 -
1.5096 - ---------------
1.5097 -
1.5098 - 05/20/01 Increased the typo list,
1.5099 - added paranoid mode,
1.5100 - ANSIfied the code and added some casts
1.5101 - so the compiler wouldn't keep asking if I knew what I was doing,
1.5102 - fixed bug in l.s.d. condition (thanks, Dave!),
1.5103 - standardized spacing when echoing,
1.5104 - added letter-combo checking code to typo section,
1.5105 - added more h/b words to typo array.
1.5106 - Not too sure about putting letter combos outside of the TYPO conditions -
1.5107 - someone is sure to have a book about the tbaka tribe, or something. Anyway, let's see.
1.5108 - Released as .12
1.5109 -
1.5110 - ---------------
1.5111 -
1.5112 - 06/01/01 Removed duplicate reporting of Tildes, asterisks, etc.
1.5113 - 06/10/01 Added flgets routine to help with platform-independent
1.5114 - detection of invalid line-ends. All PG text files should
1.5115 - have CR/LF (13/10) at end of line, regardless of system.
1.5116 - Gutcheck now validates this by default. (Thanks, Charles!)
1.5117 - Released as .13
1.5118 -
1.5119 - ---------------
1.5120 -
1.5121 - 06/11/01 Added parenthesis match checking. (c_brack, cbrack_err etc.)
1.5122 - Released as .14
1.5123 -
1.5124 - ---------------
1.5125 -
1.5126 - 06/23/01 Fixed: 'No',he said. not being flagged.
1.5127 -
1.5128 - Improved: better single-quotes checking:
1.5129 -
1.5130 - Ignore singlequotes surrounded by alpha, like didn't. (was OK)
1.5131 -
1.5132 - If a singlequote is at the END of a word AND the word ends in "s":
1.5133 - The dogs' tails wagged.
1.5134 - it's probably an apostrophe, but less commonly may be a closequote:
1.5135 - "These 'pack dogs' of yours look more like wolves."
1.5136 -
1.5137 - If it's got punctuation before it and is followed by a space
1.5138 - or punctuation:
1.5139 - . . . was a problem,' he said
1.5140 - . . . was a problem,'"
1.5141 - it is probably (certainly?) a closequote.
1.5142 -
1.5143 - If it's at start of paragraph, it's probably an openquote.
1.5144 - (but watch dialect)
1.5145 -
1.5146 - Words with ' at beginning and end are probably quoted:
1.5147 - "You have the word 'chivalry' frequently on your lips."
1.5148 - (Not specifically implemented)
1.5149 - V.18 I'm glad I didn't implement this, 'cos it jest ain't so
1.5150 - where the convention is to punctuate outside the quotes.
1.5151 - 'Come', he said, 'and join the party'.
1.5152 -
1.5153 - If it is followed by an alpha, and especially a capital:
1.5154 - 'Hello,' called he.
1.5155 - it is either an openquote or dialect.
1.5156 -
1.5157 - Dialect breaks ALL the rules:
1.5158 - A man's a man for a' that.
1.5159 - "Aye, but 'tis all in the pas' now."
1.5160 - "'Tis often the way," he said.
1.5161 - 'Ave a drink on me.
1.5162 -
1.5163 - This version looks to be an improvement, and produces
1.5164 - fewer false positives, but is still not perfect. The
1.5165 - 'pack dogs' case still fools it, and dialect is still
1.5166 - a problem. Oh, well, it's an improvement, and I have
1.5167 - a weighted structure in place for refining guesses at
1.5168 - closequotes. Maybe next time, I'll add a bit of logic
1.5169 - where if there is an open quote and one that was guessed
1.5170 - to be a possessive apostrophe after s, I'll re-guess it
1.5171 - to be a closequote. Let's see how this one flies, first.
1.5172 -
1.5173 - (Afterview: it's still crap. Needs much work, and a deeper insight.)
1.5174 -
1.5175 - Released as .15
1.5176 -
1.5177 - TODO: More he/be checks. Can't be perfect - counterexamples:
1.5178 - I gave my son good advice: be married regardless of the world's opinion.
1.5179 - I gave my son good advice: he married regardless of the world's opinion.
1.5180 -
1.5181 - If by "primitive" be meant "crude", we can understand the sentence.
1.5182 - If by "primitive" he meant "crude", we can understand the sentence.
1.5183 -
1.5184 - No matter what be said, I must go on.
1.5185 - No matter what he said, I must go on.
1.5186 -
1.5187 - No value, however great, can be set upon them.
1.5188 - No value, however great, can he set upon them.
1.5189 -
1.5190 - Real-Life one from a DP International Weekly Miscellany:
1.5191 - He wandered through the forest without fear, sleeping
1.5192 - much, for in sleep be had companionship--the Great
1.5193 - Spirit teaching him what he should know in dreams.
1.5194 - That one found by jeebies, and it turned out to be "he".
1.5195 -
1.5196 -
1.5197 - ---------------
1.5198 -
1.5199 - 07/01/01 Added -O option.
1.5200 - Improved singlequotes by reporting mismatched single quotes
1.5201 - only if an open_single_quotes was found.
1.5202 -
1.5203 - Released as .16
1.5204 -
1.5205 - ---------------
1.5206 -
1.5207 - 08/27/01 Added -Y switch for Robert Rowe to allow his app to
1.5208 - catch the error output.
1.5209 -
1.5210 - Released as .17
1.5211 -
1.5212 - ---------------
1.5213 -
1.5214 - 09/08/01 Added checking Capitals at start of paragraph, but not
1.5215 - checking them at start of sentence.
1.5216 -
1.5217 - TODO: Parse sentences out so can check reliably for start of
1.5218 - sentence. Need a whole different approach for that.
1.5219 - (Can't just rely on periods, since they are also
1.5220 - used for abbreviations, etc.)
1.5221 -
1.5222 - Added checking for all vowels or all consonants in a word.
1.5223 -
1.5224 - While I was in, I added "ii" checking and "tl" at start of word.
1.5225 -
1.5226 - Added echoing of first line of paragraph when reporting
1.5227 - mismatched quoted or brackets (thanks to David Widger for the
1.5228 - suggestion)
1.5229 -
1.5230 - Not querying L at start of a number (used for British pounds).
1.5231 -
1.5232 - The spelling changes are sort of half-done but released anyway
1.5233 - Skipped .18 because I had given out a couple of test versions
1.5234 - with that number.
1.5235 -
1.5236 - 09/25/01 Released as .19
1.5237 -
1.5238 - ---------------
1.5239 -
1.5240 - TODO:
1.5241 - Use the logic from my new version of safewrap to stop querying
1.5242 - short lines like poems and TOCs.
1.5243 - Ignore non-standard ellipses like . . . or ...
1.5244 -
1.5245 -
1.5246 - ---------------
1.5247 - 10/01/01 Made any line over 80 a VERY long line (was 85).
1.5248 - Recognized openquotes on indented paragraphs as continuations
1.5249 - of the same speech.
1.5250 - Added "cf" to the okword list (how did I forget _that_?) and a few others.
1.5251 - Moved abbrev to okword and made it more general.
1.5252 - Removed requirement that PG_space_emdash be greater than
1.5253 - ten before turning off warnings about spaced dashes.
1.5254 - Added period to list of characters that might constitute a separator line.
1.5255 - Now checking for double punctuation (Thanks, David!)
1.5256 - Now if two spaced em-dashes on a line, reports both. (DW)
1.5257 - Bug: Wasn't catching spaced punctuation at line-end since I
1.5258 - added flgets in version .13 - fixed.
1.5259 - Bug: Wasn't catching spaced singlequotes - fixed
1.5260 - Now reads punctuated numbers like 1,000 as a single word.
1.5261 - (Used to give "standalone 1" type queries)
1.5262 - Changed paranoid mode - not including s and p options. -ex is now quite usable.
1.5263 - Bug: was calling `"For it is perfectly impossible," Unspaced Quotes - fixed
1.5264 - Bug: Sometimes gave _next_ line number for queried word at end of line - fixed
1.5265 -
1.5266 - 10/22/01 Released as .20
1.5267 -
1.5268 - ---------------
1.5269 -
1.5270 - Added count of lines with spaces at end. (cnt_spacend) (Thanks, Brett!)
1.5271 - Reduced the number of hi-bit letters needed to stop reporting them
1.5272 - from 1/20 to 1/100 or 200 in total.
1.5273 - Added PG footer check.
1.5274 - Added the -h switch.
1.5275 - Fixed platform-specific CHAR_EOL checking for isemptyline - changed to 13 and 10
1.5276 - Not reporting ".," when there are many of them, such as a book with many references to "Vol 1., p. 23"
1.5277 - Added unspaced brackets check when surrounded by alpha.
1.5278 - Removed all typo reporting unless the typo switch is on.
1.5279 - Added gcisalpha to ease over-reporting of 8-bit queries.
1.5280 - ECHO_SWITCH is now ON by default!
1.5281 - PARANOID_SWITCH is now ON by default!
1.5282 - Checking for ">From" placed there by e-mail MTA (Thanks Andrew & Greg)
1.5283 - Checking for standalone lowercase "l"
1.5284 - Checking for standalone lowercase "s"
1.5285 - Considering "is be" and "be is" "be was" "was be" as he/be errors
1.5286 - Looking at punct at end of para
1.5287 -
1.5288 - 01/20/02 Released as .21
1.5289 -
1.5290 - ---------------
1.5291 -
1.5292 - Added VERBOSE_SWITCH to make it list everything. (George Davis)
1.5293 -
1.5294 - ---------------
1.5295 -
1.5296 - 02/17/02 Added cint in flgets to try fix an EOF failure on a compiler I don't have.
1.5297 - after which
1.5298 - This line caused a coredump on Solaris - fixed.
1.5299 - Da sagte die Figur: " Das ist alles gar schoen, und man mag die Puppe
1.5300 - 03/09/02 Changed header recognition for another header change
1.5301 - Called it .24
1.5302 - 03/29/02 Added qword[][] so I can suppress massive overreporting
1.5303 - of queried "words" like "FN", "Wm.", "th'", people's
1.5304 - initials, chemical formulae and suchlike in some texts.
1.5305 - Called it .25
1.5306 - 04/07/02 The qword summary reports at end shouldn't show in OVERVIEW mode. Fixed.
1.5307 - Added linecounts in overview mode.
1.5308 - Wow! gutcheck gutcheck.exe doesn't report a binary! :-) Need to tighten up. Done.
1.5309 - "m" is a not uncommon scanno for "in", but also appears in "a.m." - Can I get round that?
1.5310 - 07/07/02 Added GPL.
1.5311 - Added checking for broken em-dash at line-end (enddash)
1.5312 - Released as 0.95
1.5313 - 08/17/02 Fixed a bug that treated some hi-bit characters as spaces. Thanks, Carlo.
1.5314 - Released as 0.96
1.5315 - 10/10/02 Suppressing some annoying multiple reports by default:
1.5316 - Standalone Ones, Asterisks, Square Brackets.
1.5317 - Digit 1 occurs often in many scientific texts.
1.5318 - Asterisk occurs often in multi-footnoted texts.
1.5319 - Mismatch Square Brackets occurs often in multi-para footnotes.
1.5320 - Added -m switch for Charlz. Horrible. Nasty. Kludgy. Evil.
1.5321 - . . . but it does more or less work for the main cases.
1.5322 - Removed uppercase within a word as a separate category so
1.5323 - that names like VanAllen get reported only once, like other
1.5324 - suspected typos.
1.5325 - 11/24/02 Fixed - -m switch wasn't looking at htmlnum in
1.5326 - loseentities (Thanks, Brett!)
1.5327 - Fixed bug which occasionally gave false warning of
1.5328 - paragraph starting with lowercase.
1.5329 - Added underscore as character not to query around doublequotes.
1.5330 - Split the "Non-ASCII" message into "Non-ASCII" vs. "Non-ISO-8859"
1.5331 - . . . this is to help detect things like CP1252 characters.
1.5332 - Released as 0.97
1.5333 -
1.5334 - 12/01/02 Hacked a simplified version of the "Wrongspaced quotes" out of gutspell,
1.5335 - for doublequotes only. Replaces "Spaced quote", since it also covers that
1.5336 - case.
1.5337 - Added "warn_hyphen" to ease over-reporting of hyphens.
1.5338 -
1.5339 - 12/20/02 Added "extra period" checks.
1.5340 - Added single character line check
1.5341 - Added I" check - is usually an exclam
1.5342 - Released as 0.98
1.5343 -
1.5344 - 1/5/03 Eeek! Left in a lowerit(argv[0]) at the start before procfile()
1.5345 - from when I was looking at ways to identify markup. Refuses to
1.5346 - open files for *nix users with upcase in the filemanes. Removed.
1.5347 - Fixed quickly and released as 0.981
1.5348 -
1.5349 - 1/8/03 Added "arid" to the list of typos, slightly against my better
1.5350 - judgement, but the DP gang are all excited about it. :-)
1.5351 - Added a check for comma followed by capital letter, where
1.5352 - a period has OCRed into a comma. (DW). Not sure about this
1.5353 - either; we'll see.
1.5354 - Compiling for Win32 to allow longfilenames.
1.5355 -
1.5356 - 6/1/04 A messy test release for DW to include the "gutcheck.typ"
1.5357 - process. And the gutcheck.jee trials. Removed "arid" --
1.5358 - it can go in gutcheck.typ
1.5359 -
1.5360 - Added checks for carats ^ and slants / but disabling slant
1.5361 - queries if more than 20 of them, because some people use them
1.5362 - for /italics/. Slants are commonly mistaken italic "I"s.
1.5363 -
1.5364 - Later: removed gutcheck.jee -- wrote jeebies instead.
1.5365 -
1.5366 -Random TODO:
1.5367 - Check brackets more closely, like quotes, so that it becomes
1.5368 - easy to find the error in long paragraphs full of brackets.
1.5369 -
1.5370 -
1.5371 - 11/4/04 Assorted cleanup. Fixed case where text started with an
1.5372 - unbalanced paragraph.
1.5373 -
1.5374 - 1/2/05 Has it really been that long? Added "nocomma", "noperiod" check.
1.5375 - Bits and pieces: improved isroman(). Added isletter().
1.5376 - Other stuff I never noted before this.
1.5377 -
1.5378 - 7/3/05 Stuck in a quick start on DP-markup ignoring
1.5379 - at BillFlis's suggestion.
1.5380 -
1.5381 - 1/23/06 Took out nocomma etc if typos are off. Why did I ever leave that in?
1.5382 - Don't count footer for dotcomma etc.
1.5383 -
1.5384 -
1.5385 -1 I
1.5386 -ail all
1.5387 -arc are
1.5388 -arid and
1.5389 -bad had
1.5390 -ball hall
1.5391 -band hand
1.5392 -bar her
1.5393 -bat but
1.5394 -be he
1.5395 -bead head
1.5396 -beads heads
1.5397 -bear hear
1.5398 -bit hit
1.5399 -bo be
1.5400 -boon been
1.5401 -borne home
1.5402 -bow how
1.5403 -bumbled humbled
1.5404 -car ear
1.5405 -carnage carriage
1.5406 -carne came
1.5407 -cast east
1.5408 -cat cut
1.5409 -cat eat
1.5410 -cheek check
1.5411 -clay day
1.5412 -coining coming
1.5413 -comer corner
1.5414 -die she
1.5415 -docs does
1.5416 -ease case
1.5417 -fail fall
1.5418 -fee he
1.5419 -haying having
1.5420 -ho he
1.5421 -ho who
1.5422 -hut but
1.5423 -is as
1.5424 -lie he
1.5425 -lime time
1.5426 -loth 10th
1.5427 -m in
1.5428 -modem modern
1.5429 -Ms his
1.5430 -ray away
1.5431 -ray my
1.5432 -ringer finger
1.5433 -ringers fingers
1.5434 -rioted noted
1.5435 -tho the
1.5436 -tie he
1.5437 -tie the
1.5438 -tier her
1.5439 -tight right
1.5440 -tile the
1.5441 -tiling thing
1.5442 -tip up
1.5443 -tram train
1.5444 -tune time
1.5445 -u "
1.5446 -wen well
1.5447 -yon you
1.5448 -
1.5449 -*********************************************************************/
1.5450 -