1.1 --- a/bookloupe/bookloupe.c Sun May 26 22:43:45 2013 +0100
1.2 +++ b/bookloupe/bookloupe.c Mon May 27 09:03:04 2013 +0100
1.3 @@ -1,21 +1,21 @@
1.4 /*************************************************************************/
1.5 /* bookloupe--check for assorted weirdnesses in a PG candidate text file */
1.6 -/* */
1.7 -/* Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com> */
1.8 -/* Copyright 2012- J. Ali Harlow <ali@juiblex.co.uk> */
1.9 -/* */
1.10 +/* */
1.11 +/* Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com> */
1.12 +/* Copyright 2012- J. Ali Harlow <ali@juiblex.co.uk> */
1.13 +/* */
1.14 /* This program is free software; you can redistribute it and/or modify */
1.15 /* it under the terms of the GNU General Public License as published by */
1.16 /* the Free Software Foundation; either version 2 of the License, or */
1.17 -/* (at your option) any later version. */
1.18 -/* */
1.19 +/* (at your option) any later version. */
1.20 +/* */
1.21 /* This program is distributed in the hope that it will be useful, */
1.22 -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
1.23 -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
1.24 -/* GNU General Public License for more details. */
1.25 -/* */
1.26 -/* You should have received a copy of the GNU General Public License */
1.27 -/* along with this program. If not, see <http://www.gnu.org/licenses/>. */
1.28 +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
1.29 +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
1.30 +/* GNU General Public License for more details. */
1.31 +/* */
1.32 +/* You should have received a copy of the GNU General Public License */
1.33 +/* along with this program. If not, see <http://www.gnu.org/licenses/>. */
1.34 /*************************************************************************/
1.35
1.36 #include <stdio.h>
1.37 @@ -23,8 +23,8 @@
1.38 #include <string.h>
1.39 #include <ctype.h>
1.40
1.41 -#define MAXWORDLEN 80 /* max length of one word */
1.42 -#define LINEBUFSIZE 2048 /* buffer size for an input line */
1.43 +#define MAXWORDLEN 80 /* max length of one word */
1.44 +#define LINEBUFSIZE 2048 /* buffer size for an input line */
1.45
1.46 #define MAX_USER_TYPOS 1000
1.47 #define USERTYPO_FILE "gutcheck.typ"
1.48 @@ -257,17 +257,17 @@
1.49 };
1.50
1.51 /* special characters */
1.52 -#define CHAR_SPACE 32
1.53 -#define CHAR_TAB 9
1.54 -#define CHAR_LF 10
1.55 -#define CHAR_CR 13
1.56 -#define CHAR_DQUOTE 34
1.57 -#define CHAR_SQUOTE 39
1.58 +#define CHAR_SPACE 32
1.59 +#define CHAR_TAB 9
1.60 +#define CHAR_LF 10
1.61 +#define CHAR_CR 13
1.62 +#define CHAR_DQUOTE 34
1.63 +#define CHAR_SQUOTE 39
1.64 #define CHAR_OPEN_SQUOTE 96
1.65 -#define CHAR_TILDE 126
1.66 -#define CHAR_ASTERISK 42
1.67 -#define CHAR_FORESLASH 47
1.68 -#define CHAR_CARAT 94
1.69 +#define CHAR_TILDE 126
1.70 +#define CHAR_ASTERISK 42
1.71 +#define CHAR_FORESLASH 47
1.72 +#define CHAR_CARAT 94
1.73
1.74 #define CHAR_UNDERSCORE '_'
1.75 #define CHAR_OPEN_CBRACK '{'
1.76 @@ -282,31 +282,31 @@
1.77 #define WAY_TOO_LONG 80
1.78 #define SHORTEST_PG_LINE 55
1.79
1.80 -#define SWITCHES "ESTPXLOYHWVMUD" /* switches:- */
1.81 - /* D - ignore DP-specific markup */
1.82 - /* E - echo queried line */
1.83 - /* S - check single quotes */
1.84 - /* T - check common typos */
1.85 - /* P - require closure of quotes on */
1.86 - /* every paragraph */
1.87 - /* X - "Trust no one" :-) Paranoid! */
1.88 - /* Queries everything */
1.89 - /* L - line end checking defaults on */
1.90 - /* -L turns it off */
1.91 - /* O - overview. Just shows counts. */
1.92 - /* Y - puts errors to stdout */
1.93 - /* instead of stderr */
1.94 - /* H - Echoes header fields */
1.95 - /* M - Ignore markup in < > */
1.96 - /* U - Use file of User-defined Typos*/
1.97 - /* W - Defaults for use on Web upload*/
1.98 - /* V - Verbose - list EVERYTHING! */
1.99 -#define SWITNO 14 /* max number of switch parms */
1.100 - /* - used for defining array-size */
1.101 -#define MINARGS 1 /* minimum no of args excl switches */
1.102 -#define MAXARGS 1 /* maximum no of args excl switches */
1.103 +#define SWITCHES "ESTPXLOYHWVMUD" /* switches:- */
1.104 + /* D - ignore DP-specific markup */
1.105 + /* E - echo queried line */
1.106 + /* S - check single quotes */
1.107 + /* T - check common typos */
1.108 + /* P - require closure of quotes on */
1.109 + /* every paragraph */
1.110 + /* X - "Trust no one" :-) Paranoid! */
1.111 + /* Queries everything */
1.112 + /* L - line end checking defaults on */
1.113 + /* -L turns it off */
1.114 + /* O - overview. Just shows counts. */
1.115 + /* Y - puts errors to stdout */
1.116 + /* instead of stderr */
1.117 + /* H - Echoes header fields */
1.118 + /* M - Ignore markup in < > */
1.119 + /* U - Use file of User-defined Typos */
1.120 + /* W - Defaults for use on Web upload */
1.121 + /* V - Verbose - list EVERYTHING! */
1.122 +#define SWITNO 14 /* max number of switch parms */
1.123 + /* - used for defining array-size */
1.124 +#define MINARGS 1 /* minimum no of args excl switches */
1.125 +#define MAXARGS 1 /* maximum no of args excl switches */
1.126
1.127 -int pswit[SWITNO]; /* program switches set by SWITCHES */
1.128 +int pswit[SWITNO]; /* program switches set by SWITCHES */
1.129
1.130 #define ECHO_SWITCH 0
1.131 #define SQUOTE_SWITCH 1
1.132 @@ -321,23 +321,24 @@
1.133 #define VERBOSE_SWITCH 10
1.134 #define MARKUP_SWITCH 11
1.135 #define USERTYPO_SWITCH 12
1.136 -#define DP_SWITCH 13
1.137 +#define DP_SWITCH 13
1.138
1.139 -long cnt_dquot; /* for overview mode, count of doublequote queries */
1.140 -long cnt_squot; /* for overview mode, count of singlequote queries */
1.141 -long cnt_brack; /* for overview mode, count of brackets queries */
1.142 -long cnt_bin; /* for overview mode, count of non-ASCII queries */
1.143 -long cnt_odd; /* for overview mode, count of odd character queries */
1.144 -long cnt_long; /* for overview mode, count of long line errors */
1.145 -long cnt_short; /* for overview mode, count of short line queries */
1.146 -long cnt_punct; /* for overview mode, count of punctuation and spacing queries */
1.147 -long cnt_dash; /* for overview mode, count of dash-related queries */
1.148 -long cnt_word; /* for overview mode, count of word queries */
1.149 -long cnt_html; /* for overview mode, count of html queries */
1.150 -long cnt_lineend; /* for overview mode, count of line-end queries */
1.151 -long cnt_spacend; /* count of lines with space at end */
1.152 -long linecnt; /* count of total lines in the file */
1.153 -long checked_linecnt; /* count of lines actually checked */
1.154 +long cnt_dquot; /* for overview mode, count of doublequote queries */
1.155 +long cnt_squot; /* for overview mode, count of singlequote queries */
1.156 +long cnt_brack; /* for overview mode, count of brackets queries */
1.157 +long cnt_bin; /* for overview mode, count of non-ASCII queries */
1.158 +long cnt_odd; /* for overview mode, count of odd character queries */
1.159 +long cnt_long; /* for overview mode, count of long line errors */
1.160 +long cnt_short; /* for overview mode, count of short line queries */
1.161 +long cnt_punct; /* for overview mode,
1.162 + count of punctuation and spacing queries */
1.163 +long cnt_dash; /* for overview mode, count of dash-related queries */
1.164 +long cnt_word; /* for overview mode, count of word queries */
1.165 +long cnt_html; /* for overview mode, count of html queries */
1.166 +long cnt_lineend; /* for overview mode, count of line-end queries */
1.167 +long cnt_spacend; /* count of lines with space at end */
1.168 +long linecnt; /* count of total lines in the file */
1.169 +long checked_linecnt; /* count of lines actually checked */
1.170
1.171 void proghelp(void);
1.172 void procfile(char *);
1.173 @@ -379,7 +380,40 @@
1.174 #define MAX_QWORD 50
1.175 #define MAX_QWORD_LENGTH 40
1.176 char qword[MAX_QWORD][MAX_QWORD_LENGTH];
1.177 -signed int dupcnt[MAX_QWORD];
1.178 +int dupcnt[MAX_QWORD];
1.179 +
1.180 +struct first_pass_results {
1.181 + long firstline,astline;
1.182 + long footerline,totlen,binlen,alphalen,endquote_count,shortline,dotcomma;
1.183 + long fslashline,hyphens,longline,verylongline,htmcount,standalone_digit;
1.184 + long spacedash,emdash,space_emdash,non_PG_space_emdash,PG_space_emdash;
1.185 + int Dutchcount,Frenchcount;
1.186 +};
1.187 +
1.188 +struct warnings {
1.189 + int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen;
1.190 + int endquote,isDutch,isFrench;
1.191 +};
1.192 +
1.193 +struct counters {
1.194 + long quot;
1.195 + int c_unders,c_brack,s_brack,r_brack;
1.196 + int open_single_quote,close_single_quote;
1.197 +};
1.198 +
1.199 +struct line_properties {
1.200 + unsigned int len,blen;
1.201 + char start;
1.202 +};
1.203 +
1.204 +struct parities {
1.205 + int dquote,squote;
1.206 +};
1.207 +
1.208 +struct pending {
1.209 + char dquote[80],squote[80],rbrack[80],sbrack[80],cbrack[80],unders[80];
1.210 + long squot;
1.211 +};
1.212
1.213 int main(int argc,char **argv)
1.214 {
1.215 @@ -389,14 +423,14 @@
1.216 FILE *usertypofile;
1.217 if (strlen(argv[0])<sizeof(running_from))
1.218 /* save the path to the executable */
1.219 - strcpy(running_from,argv[0]);
1.220 + strcpy(running_from,argv[0]);
1.221 /* find out what directory we're running from */
1.222 s=running_from+strlen(running_from);
1.223 for (;*s!='/' && *s!='\\' && s>=running_from;s--)
1.224 - *s=0;
1.225 + *s=0;
1.226 switno=strlen(SWITCHES);
1.227 for (i=switno;--i>0;)
1.228 - pswit[i]=0; /* initialise switches */
1.229 + pswit[i]=0; /* initialise switches */
1.230 /*
1.231 * Standard loop to extract switches.
1.232 * When we come out of this loop, the arguments will be
1.233 @@ -404,25 +438,25 @@
1.234 * represented by their equivalent elements in pswit[]
1.235 */
1.236 while (--argc>0 && **++argv=='-')
1.237 - for (argsw=argv[0]+1;*argsw!='\0';argsw++)
1.238 - for (i=switno,invarg=1;(--i>=0) && invarg==1;)
1.239 - if ((toupper(*argsw))==SWITCHES[i])
1.240 + for (argsw=argv[0]+1;*argsw!='\0';argsw++)
1.241 + for (i=switno,invarg=1;(--i>=0) && invarg==1;)
1.242 + if ((toupper(*argsw))==SWITCHES[i])
1.243 {
1.244 - invarg=0;
1.245 - pswit[i]=1;
1.246 + invarg=0;
1.247 + pswit[i]=1;
1.248 }
1.249 /* Paranoid checking is turned OFF, not on, by its switch */
1.250 pswit[PARANOID_SWITCH]^=1;
1.251 if (pswit[PARANOID_SWITCH])
1.252 /* if running in paranoid mode force typo checks as well */
1.253 - pswit[TYPO_SWITCH]=pswit[TYPO_SWITCH]^1;
1.254 + pswit[TYPO_SWITCH]=pswit[TYPO_SWITCH]^1;
1.255 /* Line-end checking is turned OFF, not on, by its switch */
1.256 pswit[LINE_END_SWITCH]^=1;
1.257 /* Echoing is turned OFF, not on, by its switch */
1.258 pswit[ECHO_SWITCH]^=1;
1.259 if (pswit[OVERVIEW_SWITCH])
1.260 /* just print summary; don't echo */
1.261 - pswit[ECHO_SWITCH]=0;
1.262 + pswit[ECHO_SWITCH]=0;
1.263 /*
1.264 * Web uploads - for the moment, this is really just a placeholder
1.265 * until we decide what processing we really want to do on web uploads
1.266 @@ -430,78 +464,78 @@
1.267 if (pswit[WEB_SWITCH])
1.268 {
1.269 /* specific override for web uploads */
1.270 - pswit[ECHO_SWITCH]=1;
1.271 - pswit[SQUOTE_SWITCH]=0;
1.272 - pswit[TYPO_SWITCH]=1;
1.273 - pswit[QPARA_SWITCH]=0;
1.274 - pswit[PARANOID_SWITCH]=1;
1.275 - pswit[LINE_END_SWITCH]=0;
1.276 - pswit[OVERVIEW_SWITCH]=0;
1.277 - pswit[STDOUT_SWITCH]=0;
1.278 - pswit[HEADER_SWITCH]=1;
1.279 - pswit[VERBOSE_SWITCH]=0;
1.280 - pswit[MARKUP_SWITCH]=0;
1.281 - pswit[USERTYPO_SWITCH]=0;
1.282 - pswit[DP_SWITCH]=0;
1.283 + pswit[ECHO_SWITCH]=1;
1.284 + pswit[SQUOTE_SWITCH]=0;
1.285 + pswit[TYPO_SWITCH]=1;
1.286 + pswit[QPARA_SWITCH]=0;
1.287 + pswit[PARANOID_SWITCH]=1;
1.288 + pswit[LINE_END_SWITCH]=0;
1.289 + pswit[OVERVIEW_SWITCH]=0;
1.290 + pswit[STDOUT_SWITCH]=0;
1.291 + pswit[HEADER_SWITCH]=1;
1.292 + pswit[VERBOSE_SWITCH]=0;
1.293 + pswit[MARKUP_SWITCH]=0;
1.294 + pswit[USERTYPO_SWITCH]=0;
1.295 + pswit[DP_SWITCH]=0;
1.296 }
1.297 if (argc<MINARGS || argc>MAXARGS)
1.298 {
1.299 /* check number of args */
1.300 - proghelp();
1.301 - return 1;
1.302 + proghelp();
1.303 + return 1;
1.304 }
1.305 /* read in the user-defined stealth scanno list */
1.306 if (pswit[USERTYPO_SWITCH])
1.307 {
1.308 /* ... we were told we had one! */
1.309 - usertypofile=fopen(USERTYPO_FILE,"rb");
1.310 - if (!usertypofile)
1.311 + usertypofile=fopen(USERTYPO_FILE,"rb");
1.312 + if (!usertypofile)
1.313 {
1.314 /* not in cwd. try excuteable directory. */
1.315 - strcpy(usertypo_file,running_from);
1.316 - strcat(usertypo_file,USERTYPO_FILE);
1.317 - usertypofile=fopen(usertypo_file,"rb");
1.318 - if (!usertypofile) {
1.319 + strcpy(usertypo_file,running_from);
1.320 + strcat(usertypo_file,USERTYPO_FILE);
1.321 + usertypofile=fopen(usertypo_file,"rb");
1.322 + if (!usertypofile) {
1.323 /* we ain't got no user typo file! */
1.324 - printf(" --> I couldn't find gutcheck.typ "
1.325 + printf(" --> I couldn't find gutcheck.typ "
1.326 "-- proceeding without user typos.\n");
1.327 }
1.328 }
1.329 - usertypo_count=0;
1.330 - if (usertypofile)
1.331 + usertypo_count=0;
1.332 + if (usertypofile)
1.333 {
1.334 /* we managed to open a User Typo File! */
1.335 - if (pswit[USERTYPO_SWITCH])
1.336 + if (pswit[USERTYPO_SWITCH])
1.337 {
1.338 - while (flgets(aline,LINEBUFSIZE-1,usertypofile,
1.339 + while (flgets(aline,LINEBUFSIZE-1,usertypofile,
1.340 (long)usertypo_count))
1.341 {
1.342 - if (strlen(aline)>1)
1.343 + if (strlen(aline)>1)
1.344 {
1.345 - if ((int)*aline>33)
1.346 + if ((int)*aline>33)
1.347 {
1.348 - s=malloc(strlen(aline)+1);
1.349 - if (!s)
1.350 + s=malloc(strlen(aline)+1);
1.351 + if (!s)
1.352 {
1.353 - fprintf(stderr,"bookloupe: cannot get enough "
1.354 + fprintf(stderr,"bookloupe: cannot get enough "
1.355 "memory for user typo file!\n");
1.356 - exit(1);
1.357 + exit(1);
1.358 }
1.359 - strcpy(s,aline);
1.360 - usertypo[usertypo_count]=s;
1.361 - usertypo_count++;
1.362 - if (usertypo_count>=MAX_USER_TYPOS)
1.363 + strcpy(s,aline);
1.364 + usertypo[usertypo_count]=s;
1.365 + usertypo_count++;
1.366 + if (usertypo_count>=MAX_USER_TYPOS)
1.367 {
1.368 - printf(" --> Only %d user-defined typos "
1.369 + printf(" --> Only %d user-defined typos "
1.370 "allowed: ignoring the rest\n",
1.371 MAX_USER_TYPOS);
1.372 - break;
1.373 + break;
1.374 }
1.375 }
1.376 }
1.377 }
1.378 }
1.379 - fclose(usertypofile);
1.380 + fclose(usertypofile);
1.381 }
1.382 }
1.383 fprintf(stderr,"bookloupe: Check and report on an e-text\n");
1.384 @@ -513,47 +547,39 @@
1.385 {
1.386 printf(" Checked %ld lines of %ld (head+foot = %ld)\n\n",
1.387 checked_linecnt,linecnt,linecnt-checked_linecnt);
1.388 - printf(" --------------- Queries found --------------\n");
1.389 - if (cnt_long)
1.390 - printf(" Long lines: %14ld\n",cnt_long);
1.391 - if (cnt_short)
1.392 - printf(" Short lines: %14ld\n",cnt_short);
1.393 - if (cnt_lineend)
1.394 - printf(" Line-end problems: %14ld\n",cnt_lineend);
1.395 - if (cnt_word)
1.396 - printf(" Common typos: %14ld\n",cnt_word);
1.397 - if (cnt_dquot)
1.398 - printf(" Unmatched quotes: %14ld\n",cnt_dquot);
1.399 - if (cnt_squot)
1.400 - printf(" Unmatched SingleQuotes: %14ld\n",cnt_squot);
1.401 - if (cnt_brack)
1.402 - printf(" Unmatched brackets: %14ld\n",cnt_brack);
1.403 - if (cnt_bin)
1.404 - printf(" Non-ASCII characters: %14ld\n",cnt_bin);
1.405 - if (cnt_odd)
1.406 - printf(" Proofing characters: %14ld\n",cnt_odd);
1.407 - if (cnt_punct)
1.408 + printf(" --------------- Queries found --------------\n");
1.409 + if (cnt_long)
1.410 + printf(" Long lines: %14ld\n",cnt_long);
1.411 + if (cnt_short)
1.412 + printf(" Short lines: %14ld\n",cnt_short);
1.413 + if (cnt_lineend)
1.414 + printf(" Line-end problems: %14ld\n",cnt_lineend);
1.415 + if (cnt_word)
1.416 + printf(" Common typos: %14ld\n",cnt_word);
1.417 + if (cnt_dquot)
1.418 + printf(" Unmatched quotes: %14ld\n",cnt_dquot);
1.419 + if (cnt_squot)
1.420 + printf(" Unmatched SingleQuotes: %14ld\n",cnt_squot);
1.421 + if (cnt_brack)
1.422 + printf(" Unmatched brackets: %14ld\n",cnt_brack);
1.423 + if (cnt_bin)
1.424 + printf(" Non-ASCII characters: %14ld\n",cnt_bin);
1.425 + if (cnt_odd)
1.426 + printf(" Proofing characters: %14ld\n",cnt_odd);
1.427 + if (cnt_punct)
1.428 printf(" Punctuation & spacing queries: %14ld\n",cnt_punct);
1.429 - if (cnt_dash)
1.430 - printf(" Non-standard dashes: %14ld\n",cnt_dash);
1.431 - if (cnt_html)
1.432 - printf(" Possible HTML tags: %14ld\n",cnt_html);
1.433 - printf("\n");
1.434 - printf(" TOTAL QUERIES %14ld\n",
1.435 - cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+
1.436 - cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend);
1.437 + if (cnt_dash)
1.438 + printf(" Non-standard dashes: %14ld\n",cnt_dash);
1.439 + if (cnt_html)
1.440 + printf(" Possible HTML tags: %14ld\n",cnt_html);
1.441 + printf("\n");
1.442 + printf(" TOTAL QUERIES %14ld\n",
1.443 + cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+
1.444 + cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend);
1.445 }
1.446 return 0;
1.447 }
1.448
1.449 -struct first_pass_results {
1.450 - long firstline,astline;
1.451 - long footerline,totlen,binlen,alphalen,endquote_count,shortline,dotcomma;
1.452 - long fslashline,hyphens,longline,verylongline,htmcount,standalone_digit;
1.453 - long spacedash,emdash,space_emdash,non_PG_space_emdash,PG_space_emdash;
1.454 - signed int Dutchcount,Frenchcount;
1.455 -};
1.456 -
1.457 /*
1.458 * first_pass:
1.459 *
1.460 @@ -566,140 +592,135 @@
1.461 {
1.462 char laststart=CHAR_SPACE;
1.463 const char *s;
1.464 - signed int i,llen;
1.465 + int i,llen;
1.466 unsigned int lastlen=0,lastblen=0;
1.467 long spline=0,nspline=0;
1.468 static struct first_pass_results results={0};
1.469 char inword[MAXWORDLEN]="";
1.470 while (fgets(aline,LINEBUFSIZE-1,infile))
1.471 {
1.472 - while (aline[strlen(aline)-1]==10 || aline[strlen(aline)-1]==13)
1.473 + while (aline[strlen(aline)-1]==10 || aline[strlen(aline)-1]==13)
1.474 aline[strlen(aline)-1]=0;
1.475 - linecnt++;
1.476 - if (strstr(aline,"*END") && strstr(aline,"SMALL PRINT") &&
1.477 + linecnt++;
1.478 + if (strstr(aline,"*END") && strstr(aline,"SMALL PRINT") &&
1.479 (strstr(aline,"PUBLIC DOMAIN") || strstr(aline,"COPYRIGHT")))
1.480 {
1.481 - if (spline)
1.482 - printf(" --> Duplicate header?\n");
1.483 - spline=linecnt+1; /* first line of non-header text, that is */
1.484 + if (spline)
1.485 + printf(" --> Duplicate header?\n");
1.486 + spline=linecnt+1; /* first line of non-header text, that is */
1.487 }
1.488 - if (!strncmp(aline,"*** START",9) && strstr(aline,"PROJECT GUTENBERG"))
1.489 + if (!strncmp(aline,"*** START",9) && strstr(aline,"PROJECT GUTENBERG"))
1.490 {
1.491 - if (nspline)
1.492 - printf(" --> Duplicate header?\n");
1.493 - nspline=linecnt+1; /* first line of non-header text, that is */
1.494 + if (nspline)
1.495 + printf(" --> Duplicate header?\n");
1.496 + nspline=linecnt+1; /* first line of non-header text, that is */
1.497 }
1.498 - if (spline || nspline)
1.499 + if (spline || nspline)
1.500 {
1.501 - lowerit(aline);
1.502 - if (strstr(aline,"end") && strstr(aline,"project gutenberg"))
1.503 + lowerit(aline);
1.504 + if (strstr(aline,"end") && strstr(aline,"project gutenberg"))
1.505 {
1.506 - if (strstr(aline,"end")<strstr(aline,"project gutenberg"))
1.507 + if (strstr(aline,"end")<strstr(aline,"project gutenberg"))
1.508 {
1.509 - if (results.footerline)
1.510 + if (results.footerline)
1.511 {
1.512 /* it's an old-form header - we can detect duplicates */
1.513 - if (!nspline)
1.514 - printf(" --> Duplicate footer?\n");
1.515 + if (!nspline)
1.516 + printf(" --> Duplicate footer?\n");
1.517 }
1.518 - else
1.519 - results.footerline=linecnt;
1.520 + else
1.521 + results.footerline=linecnt;
1.522 }
1.523 }
1.524 }
1.525 - if (spline)
1.526 + if (spline)
1.527 results.firstline=spline;
1.528 - if (nspline)
1.529 + if (nspline)
1.530 results.firstline=nspline; /* override with new */
1.531 - if (results.footerline)
1.532 + if (results.footerline)
1.533 continue; /* don't count the boilerplate in the footer */
1.534 - llen=strlen(aline);
1.535 - results.totlen+=llen;
1.536 - for (i=0;i<llen;i++)
1.537 + llen=strlen(aline);
1.538 + results.totlen+=llen;
1.539 + for (i=0;i<llen;i++)
1.540 {
1.541 - if ((unsigned char)aline[i]>127)
1.542 + if ((unsigned char)aline[i]>127)
1.543 results.binlen++;
1.544 - if (gcisalpha(aline[i]))
1.545 + if (gcisalpha(aline[i]))
1.546 results.alphalen++;
1.547 - if (i>0 && aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))
1.548 + if (i>0 && aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))
1.549 results.endquote_count++;
1.550 }
1.551 - if (strlen(aline)>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE &&
1.552 + if (strlen(aline)>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE &&
1.553 lastblen>2 && lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE)
1.554 results.shortline++;
1.555 - if (*aline && (unsigned char)aline[strlen(aline)-1]<=CHAR_SPACE)
1.556 + if (*aline && (unsigned char)aline[strlen(aline)-1]<=CHAR_SPACE)
1.557 cnt_spacend++;
1.558 - if (strstr(aline,".,"))
1.559 + if (strstr(aline,".,"))
1.560 results.dotcomma++;
1.561 - /* only count ast lines for ignoring purposes where there is */
1.562 - /* locase text on the line */
1.563 - if (strstr(aline,"*"))
1.564 + /* only count ast lines for ignoring purposes where there is */
1.565 + /* locase text on the line */
1.566 + if (strstr(aline,"*"))
1.567 {
1.568 - for (s=aline;*s;s++)
1.569 - if (*s>='a' && *s<='z')
1.570 - break;
1.571 - if (*s)
1.572 + for (s=aline;*s;s++)
1.573 + if (*s>='a' && *s<='z')
1.574 + break;
1.575 + if (*s)
1.576 results.astline++;
1.577 }
1.578 - if (strstr(aline,"/"))
1.579 - results.fslashline++;
1.580 - for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)
1.581 + if (strstr(aline,"/"))
1.582 + results.fslashline++;
1.583 + for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)
1.584 ;
1.585 - if (aline[i]=='-' && aline[i-1]!='-')
1.586 + if (aline[i]=='-' && aline[i-1]!='-')
1.587 results.hyphens++;
1.588 - if (llen>LONGEST_PG_LINE)
1.589 + if (llen>LONGEST_PG_LINE)
1.590 results.longline++;
1.591 - if (llen>WAY_TOO_LONG)
1.592 + if (llen>WAY_TOO_LONG)
1.593 results.verylongline++;
1.594 - if (strstr(aline,"<") && strstr(aline,">"))
1.595 + if (strstr(aline,"<") && strstr(aline,">"))
1.596 {
1.597 - i=(signed int)(strstr(aline,">")-strstr(aline,"<")+1);
1.598 - if (i>0)
1.599 - results.htmcount++;
1.600 - if (strstr(aline,"<i>"))
1.601 + i=(int)(strstr(aline,">")-strstr(aline,"<")+1);
1.602 + if (i>0)
1.603 + results.htmcount++;
1.604 + if (strstr(aline,"<i>"))
1.605 results.htmcount+=4; /* bonus marks! */
1.606 }
1.607 - /* Check for spaced em-dashes */
1.608 - if (strstr(aline,"--"))
1.609 + /* Check for spaced em-dashes */
1.610 + if (strstr(aline,"--"))
1.611 {
1.612 - results.emdash++;
1.613 - if (*(strstr(aline,"--")-1)==CHAR_SPACE ||
1.614 - (*(strstr(aline,"--")+2)==CHAR_SPACE))
1.615 + results.emdash++;
1.616 + if (*(strstr(aline,"--")-1)==CHAR_SPACE ||
1.617 + (*(strstr(aline,"--")+2)==CHAR_SPACE))
1.618 results.space_emdash++;
1.619 - if (*(strstr(aline,"--")-1)==CHAR_SPACE &&
1.620 - (*(strstr(aline,"--")+2)==CHAR_SPACE))
1.621 + if (*(strstr(aline,"--")-1)==CHAR_SPACE &&
1.622 + (*(strstr(aline,"--")+2)==CHAR_SPACE))
1.623 /* count of em-dashes with spaces both sides */
1.624 results.non_PG_space_emdash++;
1.625 - if (*(strstr(aline,"--")-1)!=CHAR_SPACE &&
1.626 - (*(strstr(aline,"--")+2)!=CHAR_SPACE))
1.627 + if (*(strstr(aline,"--")-1)!=CHAR_SPACE &&
1.628 + (*(strstr(aline,"--")+2)!=CHAR_SPACE))
1.629 /* count of PG-type em-dashes with no spaces */
1.630 results.PG_space_emdash++;
1.631 }
1.632 - for (s=aline;*s;)
1.633 + for (s=aline;*s;)
1.634 {
1.635 - s=getaword(s,inword);
1.636 - if (!strcmp(inword,"hij") || !strcmp(inword,"niet"))
1.637 - results.Dutchcount++;
1.638 - if (!strcmp(inword,"dans") || !strcmp(inword,"avec"))
1.639 - results.Frenchcount++;
1.640 - if (!strcmp(inword,"0") || !strcmp(inword,"1"))
1.641 - results.standalone_digit++;
1.642 + s=getaword(s,inword);
1.643 + if (!strcmp(inword,"hij") || !strcmp(inword,"niet"))
1.644 + results.Dutchcount++;
1.645 + if (!strcmp(inword,"dans") || !strcmp(inword,"avec"))
1.646 + results.Frenchcount++;
1.647 + if (!strcmp(inword,"0") || !strcmp(inword,"1"))
1.648 + results.standalone_digit++;
1.649 }
1.650 - /* Check for spaced dashes */
1.651 - if (strstr(aline," -") && *(strstr(aline," -")+2)!='-')
1.652 + /* Check for spaced dashes */
1.653 + if (strstr(aline," -") && *(strstr(aline," -")+2)!='-')
1.654 results.spacedash++;
1.655 - lastblen=lastlen;
1.656 - lastlen=strlen(aline);
1.657 - laststart=aline[0];
1.658 + lastblen=lastlen;
1.659 + lastlen=strlen(aline);
1.660 + laststart=aline[0];
1.661 }
1.662 return &results;
1.663 }
1.664
1.665 -struct warnings {
1.666 - signed int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen;
1.667 - signed int endquote,isDutch,isFrench;
1.668 -};
1.669 -
1.670 /*
1.671 * report_first_pass:
1.672 *
1.673 @@ -709,13 +730,13 @@
1.674 {
1.675 static struct warnings warnings={0};
1.676 if (cnt_spacend>0)
1.677 - printf(" --> %ld lines in this file have white space at end\n",
1.678 + printf(" --> %ld lines in this file have white space at end\n",
1.679 cnt_spacend);
1.680 warnings.dotcomma=1;
1.681 if (results->dotcomma>5)
1.682 {
1.683 - warnings.dotcomma=0;
1.684 - printf(" --> %ld lines in this file contain '.,'. "
1.685 + warnings.dotcomma=0;
1.686 + printf(" --> %ld lines in this file contain '.,'. "
1.687 "Not reporting them.\n",results->dotcomma);
1.688 }
1.689 /*
1.690 @@ -725,8 +746,8 @@
1.691 warnings.shortline=1;
1.692 if (results->shortline>50 || results->shortline*10>linecnt)
1.693 {
1.694 - warnings.shortline=0;
1.695 - printf(" --> %ld lines in this file are short. "
1.696 + warnings.shortline=0;
1.697 + printf(" --> %ld lines in this file are short. "
1.698 "Not reporting short lines.\n",results->shortline);
1.699 }
1.700 /*
1.701 @@ -736,16 +757,16 @@
1.702 warnings.longline=1;
1.703 if (results->longline>50 || results->longline*10>linecnt)
1.704 {
1.705 - warnings.longline=0;
1.706 - printf(" --> %ld lines in this file are long. "
1.707 + warnings.longline=0;
1.708 + printf(" --> %ld lines in this file are long. "
1.709 "Not reporting long lines.\n",results->longline);
1.710 }
1.711 /* If more than 10 lines contain asterisks, don't bother reporting them. */
1.712 warnings.ast=1;
1.713 if (results->astline>10)
1.714 {
1.715 - warnings.ast=0;
1.716 - printf(" --> %ld lines in this file contain asterisks. "
1.717 + warnings.ast=0;
1.718 + printf(" --> %ld lines in this file contain asterisks. "
1.719 "Not reporting them.\n",results->astline);
1.720 }
1.721 /*
1.722 @@ -755,8 +776,8 @@
1.723 warnings.fslash=1;
1.724 if (results->fslashline>10)
1.725 {
1.726 - warnings.fslash=0;
1.727 - printf(" --> %ld lines in this file contain forward slashes. "
1.728 + warnings.fslash=0;
1.729 + printf(" --> %ld lines in this file contain forward slashes. "
1.730 "Not reporting them.\n",results->fslashline);
1.731 }
1.732 /*
1.733 @@ -766,8 +787,8 @@
1.734 warnings.endquote=1;
1.735 if (results->endquote_count>20)
1.736 {
1.737 - warnings.endquote=0;
1.738 - printf(" --> %ld lines in this file contain unpunctuated endquotes. "
1.739 + warnings.endquote=0;
1.740 + printf(" --> %ld lines in this file contain unpunctuated endquotes. "
1.741 "Not reporting them.\n",results->endquote_count);
1.742 }
1.743 /*
1.744 @@ -777,8 +798,8 @@
1.745 warnings.digit=1;
1.746 if (results->standalone_digit>10)
1.747 {
1.748 - warnings.digit=0;
1.749 - printf(" --> %ld lines in this file contain standalone 0s and 1s. "
1.750 + warnings.digit=0;
1.751 + printf(" --> %ld lines in this file contain standalone 0s and 1s. "
1.752 "Not reporting them.\n",results->standalone_digit);
1.753 }
1.754 /*
1.755 @@ -788,17 +809,17 @@
1.756 warnings.hyphen=1;
1.757 if (results->hyphens>20)
1.758 {
1.759 - warnings.hyphen=0;
1.760 - printf(" --> %ld lines in this file have hyphens at end. "
1.761 + warnings.hyphen=0;
1.762 + printf(" --> %ld lines in this file have hyphens at end. "
1.763 "Not reporting them.\n",results->hyphens);
1.764 }
1.765 if (results->htmcount>20 && !pswit[MARKUP_SWITCH])
1.766 {
1.767 - printf(" --> Looks like this is HTML. Switching HTML mode ON.\n");
1.768 - pswit[MARKUP_SWITCH]=1;
1.769 + printf(" --> Looks like this is HTML. Switching HTML mode ON.\n");
1.770 + pswit[MARKUP_SWITCH]=1;
1.771 }
1.772 if (results->verylongline>0)
1.773 - printf(" --> %ld lines in this file are VERY long!\n",
1.774 + printf(" --> %ld lines in this file are VERY long!\n",
1.775 results->verylongline);
1.776 /*
1.777 * If there are more non-PG spaced dashes than PG em-dashes,
1.778 @@ -810,8 +831,8 @@
1.779 if (results->spacedash+results->non_PG_space_emdash>
1.780 results->PG_space_emdash)
1.781 {
1.782 - warnings.dash=0;
1.783 - printf(" --> There are %ld spaced dashes and em-dashes. "
1.784 + warnings.dash=0;
1.785 + printf(" --> There are %ld spaced dashes and em-dashes. "
1.786 "Not reporting them.\n",
1.787 results->spacedash+results->non_PG_space_emdash);
1.788 }
1.789 @@ -819,81 +840,75 @@
1.790 warnings.bin=1;
1.791 if (results->binlen*4>results->totlen)
1.792 {
1.793 - printf(" --> This file does not appear to be ASCII. "
1.794 + printf(" --> This file does not appear to be ASCII. "
1.795 "Terminating. Best of luck with it!\n");
1.796 - exit(1);
1.797 + exit(1);
1.798 }
1.799 if (results->alphalen*4<results->totlen)
1.800 {
1.801 - printf(" --> This file does not appear to be text. "
1.802 + printf(" --> This file does not appear to be text. "
1.803 "Terminating. Best of luck with it!\n");
1.804 - exit(1);
1.805 + exit(1);
1.806 }
1.807 if (results->binlen*100>results->totlen || results->binlen>100)
1.808 {
1.809 - printf(" --> There are a lot of foreign letters here. "
1.810 + printf(" --> There are a lot of foreign letters here. "
1.811 "Not reporting them.\n");
1.812 - warnings.bin=0;
1.813 + warnings.bin=0;
1.814 }
1.815 warnings.isDutch=0;
1.816 if (results->Dutchcount>50)
1.817 {
1.818 - warnings.isDutch=1;
1.819 - printf(" --> This looks like Dutch - "
1.820 + warnings.isDutch=1;
1.821 + printf(" --> This looks like Dutch - "
1.822 "switching off dashes and warnings for 's Middags case.\n");
1.823 }
1.824 warnings.isFrench=0;
1.825 if (results->Frenchcount>50)
1.826 {
1.827 - warnings.isFrench=1;
1.828 - printf(" --> This looks like French - "
1.829 + warnings.isFrench=1;
1.830 + printf(" --> This looks like French - "
1.831 "switching off some doublepunct.\n");
1.832 }
1.833 if (results->firstline && results->footerline)
1.834 - printf(" The PG header and footer appear to be already on.\n");
1.835 + printf(" The PG header and footer appear to be already on.\n");
1.836 else
1.837 {
1.838 - if (results->firstline)
1.839 - printf(" The PG header is on - no footer.\n");
1.840 - if (results->footerline)
1.841 - printf(" The PG footer is on - no header.\n");
1.842 + if (results->firstline)
1.843 + printf(" The PG header is on - no footer.\n");
1.844 + if (results->footerline)
1.845 + printf(" The PG footer is on - no header.\n");
1.846 }
1.847 printf("\n");
1.848 if (pswit[VERBOSE_SWITCH])
1.849 {
1.850 - warnings.bin=1;
1.851 - warnings.shortline=1;
1.852 - warnings.dotcomma=1;
1.853 - warnings.longline=1;
1.854 - warnings.dash=1;
1.855 - warnings.digit=1;
1.856 - warnings.ast=1;
1.857 - warnings.fslash=1;
1.858 - warnings.hyphen=1;
1.859 - warnings.endquote=1;
1.860 - printf(" *** Verbose output is ON -- you asked for it! ***\n");
1.861 + warnings.bin=1;
1.862 + warnings.shortline=1;
1.863 + warnings.dotcomma=1;
1.864 + warnings.longline=1;
1.865 + warnings.dash=1;
1.866 + warnings.digit=1;
1.867 + warnings.ast=1;
1.868 + warnings.fslash=1;
1.869 + warnings.hyphen=1;
1.870 + warnings.endquote=1;
1.871 + printf(" *** Verbose output is ON -- you asked for it! ***\n");
1.872 }
1.873 if (warnings.isDutch)
1.874 - warnings.dash=0;
1.875 + warnings.dash=0;
1.876 if (results->footerline>0 && results->firstline>0 &&
1.877 results->footerline>results->firstline &&
1.878 results->footerline-results->firstline<100)
1.879 {
1.880 - printf(" --> I don't really know where this text starts. \n");
1.881 - printf(" There are no reference points.\n");
1.882 - printf(" I'm going to have to report the header and footer "
1.883 + printf(" --> I don't really know where this text starts. \n");
1.884 + printf(" There are no reference points.\n");
1.885 + printf(" I'm going to have to report the header and footer "
1.886 "as well.\n");
1.887 - results->firstline=0;
1.888 + results->firstline=0;
1.889 }
1.890 return &warnings;
1.891 }
1.892
1.893 -struct counters {
1.894 - long quot;
1.895 - signed int c_unders,c_brack,s_brack,r_brack;
1.896 - signed int open_single_quote,close_single_quote;
1.897 -};
1.898 -
1.899 /*
1.900 * analyse_quotes:
1.901 *
1.902 @@ -908,7 +923,7 @@
1.903 */
1.904 int analyse_quotes(const char *s,struct counters *counters)
1.905 {
1.906 - signed int guessquote=0;
1.907 + int guessquote=0;
1.908 int isemptyline=1; /* assume the line is empty until proven otherwise */
1.909 while (*s)
1.910 {
1.911 @@ -1028,7 +1043,7 @@
1.912 int isemptyline)
1.913 {
1.914 /* Don't repeat multiple warnings on one line. */
1.915 - signed int eNon_A=0,eTab=0,eTilde=0,eCarat=0,eFSlash=0,eAst=0;
1.916 + int eNon_A=0,eTab=0,eTilde=0,eCarat=0,eFSlash=0,eAst=0;
1.917 const char *s;
1.918 unsigned char c;
1.919 for (s=aline;*s;s++)
1.920 @@ -1129,17 +1144,12 @@
1.921 printf("\n%s\n",aline);
1.922 if (!pswit[OVERVIEW_SWITCH])
1.923 printf(" Line %ld column %d - Long line %d\n",
1.924 - linecnt,strlen(aline),strlen(aline));
1.925 + linecnt,(int)strlen(aline),(int)strlen(aline));
1.926 else
1.927 cnt_long++;
1.928 }
1.929 }
1.930
1.931 -struct line_properties {
1.932 - unsigned int len,blen;
1.933 - char start;
1.934 -};
1.935 -
1.936 /*
1.937 * check_for_short_line:
1.938 *
1.939 @@ -1173,7 +1183,7 @@
1.940 printf("\n%s\n",prevline);
1.941 if (!pswit[OVERVIEW_SWITCH])
1.942 printf(" Line %ld column %d - Short line %d?\n",
1.943 - linecnt-1,strlen(prevline),strlen(prevline));
1.944 + linecnt-1,(int)strlen(prevline),(int)strlen(prevline));
1.945 else
1.946 cnt_short++;
1.947 }
1.948 @@ -1451,7 +1461,7 @@
1.949 void check_for_extra_period(const char *aline,const struct warnings *warnings)
1.950 {
1.951 const char *s,*t,*s1;
1.952 - signed int i,istypo,isdup;
1.953 + int i,istypo,isdup;
1.954 static char qperiod[MAX_QWORD][MAX_QWORD_LENGTH];
1.955 static int qperiod_index=0;
1.956 char testword[MAXWORDLEN]="";
1.957 @@ -1634,7 +1644,7 @@
1.958 istypo=0;
1.959 strcpy(testword,inword);
1.960 alower=0;
1.961 - for (i=0;i<(signed int)strlen(testword);i++)
1.962 + for (i=0;i<(int)strlen(testword);i++)
1.963 {
1.964 /* lowercase for testing */
1.965 if (testword[i]>='a' && testword[i]<='z')
1.966 @@ -1809,10 +1819,6 @@
1.967 }
1.968 }
1.969
1.970 -struct parities {
1.971 - int dquote,squote;
1.972 -};
1.973 -
1.974 /*
1.975 * check_for_misspaced_punctuation:
1.976 *
1.977 @@ -2312,7 +2318,7 @@
1.978 close=strstr(aline,">");
1.979 if (close)
1.980 {
1.981 - i=(signed int)(close-open+1);
1.982 + i=(int)(close-open+1);
1.983 if (i>0)
1.984 {
1.985 strncpy(wrk,open,i);
1.986 @@ -2350,7 +2356,7 @@
1.987 i=(int)(scolon-amp+1);
1.988 for (s=amp;s<scolon;s++)
1.989 if (*s==CHAR_SPACE)
1.990 - i=0; /* Don't report "Jones & Son;" */
1.991 + i=0; /* Don't report "Jones & Son;" */
1.992 if (i>0)
1.993 {
1.994 strncpy(wrk,amp,i);
1.995 @@ -2367,11 +2373,6 @@
1.996 }
1.997 }
1.998
1.999 -struct pending {
1.1000 - char dquote[80],squote[80],rbrack[80],sbrack[80],cbrack[80],unders[80];
1.1001 - long squot;
1.1002 -};
1.1003 -
1.1004 /*
1.1005 * print_pending:
1.1006 *
1.1007 @@ -2551,7 +2552,7 @@
1.1008 if (!pswit[OVERVIEW_SWITCH])
1.1009 printf(" Line %ld column %d - "
1.1010 "No punctuation at para end?\n",
1.1011 - linecnt-1,strlen(prevline));
1.1012 + linecnt-1,(int)strlen(prevline));
1.1013 else
1.1014 cnt_punct++;
1.1015 break;
1.1016 @@ -2579,22 +2580,18 @@
1.1017 struct parities parities={0};
1.1018 struct pending pending={{0},};
1.1019 int isemptyline;
1.1020 - long start_para_line;
1.1021 - signed int i,llen,isacro,isellipsis;
1.1022 - signed int isnewpara;
1.1023 - signed int enddash;
1.1024 + long start_para_line=0;
1.1025 + int i,isnewpara=0,enddash=0;
1.1026 last.start=CHAR_SPACE;
1.1027 *prevline=0;
1.1028 - linecnt=checked_linecnt=start_para_line=0;
1.1029 - i=llen=isacro=isellipsis=0;
1.1030 - isnewpara=enddash=0;
1.1031 + linecnt=checked_linecnt=0;
1.1032 infile=fopen(filename,"rb");
1.1033 if (!infile)
1.1034 {
1.1035 - if (pswit[STDOUT_SWITCH])
1.1036 - fprintf(stdout,"bookloupe: cannot open %s\n",filename);
1.1037 - else
1.1038 - fprintf(stderr,"bookloupe: cannot open %s\n",filename);
1.1039 + if (pswit[STDOUT_SWITCH])
1.1040 + fprintf(stdout,"bookloupe: cannot open %s\n",filename);
1.1041 + else
1.1042 + fprintf(stderr,"bookloupe: cannot open %s\n",filename);
1.1043 exit(1);
1.1044 }
1.1045 fprintf(stdout,"\n\nFile: %s\n\n",filename);
1.1046 @@ -2607,84 +2604,84 @@
1.1047 linecnt=0;
1.1048 while (flgets(aline,LINEBUFSIZE-1,infile,linecnt+1))
1.1049 {
1.1050 - linecnt++;
1.1051 - if (linecnt==1)
1.1052 + linecnt++;
1.1053 + if (linecnt==1)
1.1054 isnewpara=1;
1.1055 - if (pswit[DP_SWITCH] && !strncmp(aline,"-----File: ",11))
1.1056 + if (pswit[DP_SWITCH] && !strncmp(aline,"-----File: ",11))
1.1057 continue; // skip DP page separators completely
1.1058 - if (linecnt<first_pass_results->firstline ||
1.1059 + if (linecnt<first_pass_results->firstline ||
1.1060 (first_pass_results->footerline>0 &&
1.1061 linecnt>first_pass_results->footerline))
1.1062 {
1.1063 - if (pswit[HEADER_SWITCH])
1.1064 + if (pswit[HEADER_SWITCH])
1.1065 {
1.1066 - if (!strncmp(aline,"Title:",6))
1.1067 - printf(" %s\n",aline);
1.1068 - if (!strncmp(aline,"Author:",7))
1.1069 - printf(" %s\n",aline);
1.1070 - if (!strncmp(aline,"Release Date:",13))
1.1071 - printf(" %s\n",aline);
1.1072 - if (!strncmp(aline,"Edition:",8))
1.1073 - printf(" %s\n\n",aline);
1.1074 + if (!strncmp(aline,"Title:",6))
1.1075 + printf(" %s\n",aline);
1.1076 + if (!strncmp(aline,"Author:",7))
1.1077 + printf(" %s\n",aline);
1.1078 + if (!strncmp(aline,"Release Date:",13))
1.1079 + printf(" %s\n",aline);
1.1080 + if (!strncmp(aline,"Edition:",8))
1.1081 + printf(" %s\n\n",aline);
1.1082 }
1.1083 - continue; /* skip through the header */
1.1084 + continue; /* skip through the header */
1.1085 }
1.1086 - checked_linecnt++;
1.1087 + checked_linecnt++;
1.1088 print_pending(aline,parastart,&pending);
1.1089 memset(&pending,0,sizeof(pending));
1.1090 isemptyline=analyse_quotes(aline,&counters);
1.1091 - if (isnewpara && !isemptyline)
1.1092 + if (isnewpara && !isemptyline)
1.1093 {
1.1094 /* This line is the start of a new paragraph. */
1.1095 - start_para_line=linecnt;
1.1096 + start_para_line=linecnt;
1.1097 /* Capture its first line in case we want to report it later. */
1.1098 - strncpy(parastart,aline,80);
1.1099 - parastart[79]=0;
1.1100 + strncpy(parastart,aline,80);
1.1101 + parastart[79]=0;
1.1102 memset(&parities,0,sizeof(parities)); /* restart the quote count */
1.1103 - s=aline;
1.1104 - while (!gcisalpha(*s) && !gcisdigit(*s) && *s)
1.1105 + s=aline;
1.1106 + while (!gcisalpha(*s) && !gcisdigit(*s) && *s)
1.1107 s++;
1.1108 - if (*s>='a' && *s<='z')
1.1109 + if (*s>='a' && *s<='z')
1.1110 {
1.1111 /* and its first letter is lowercase */
1.1112 - if (pswit[ECHO_SWITCH])
1.1113 + if (pswit[ECHO_SWITCH])
1.1114 printf("\n%s\n",aline);
1.1115 - if (!pswit[OVERVIEW_SWITCH])
1.1116 - printf(" Line %ld column %d - "
1.1117 + if (!pswit[OVERVIEW_SWITCH])
1.1118 + printf(" Line %ld column %d - "
1.1119 "Paragraph starts with lower-case\n",
1.1120 linecnt,(int)(s-aline)+1);
1.1121 - else
1.1122 - cnt_punct++;
1.1123 + else
1.1124 + cnt_punct++;
1.1125 }
1.1126 - isnewpara=0; /* Signal the end of new para processing. */
1.1127 + isnewpara=0; /* Signal the end of new para processing. */
1.1128 }
1.1129 - /* Check for an em-dash broken at line end. */
1.1130 - if (enddash && *aline=='-')
1.1131 + /* Check for an em-dash broken at line end. */
1.1132 + if (enddash && *aline=='-')
1.1133 {
1.1134 - if (pswit[ECHO_SWITCH])
1.1135 + if (pswit[ECHO_SWITCH])
1.1136 printf("\n%s\n",aline);
1.1137 - if (!pswit[OVERVIEW_SWITCH])
1.1138 - printf(" Line %ld column 1 - Broken em-dash?\n",linecnt);
1.1139 - else
1.1140 - cnt_punct++;
1.1141 + if (!pswit[OVERVIEW_SWITCH])
1.1142 + printf(" Line %ld column 1 - Broken em-dash?\n",linecnt);
1.1143 + else
1.1144 + cnt_punct++;
1.1145 }
1.1146 - enddash=0;
1.1147 - for (s=aline+strlen(aline)-1;*s==' ' && s>aline;s--)
1.1148 + enddash=0;
1.1149 + for (s=aline+strlen(aline)-1;*s==' ' && s>aline;s--)
1.1150 ;
1.1151 - if (s>=aline && *s=='-')
1.1152 - enddash=1;
1.1153 + if (s>=aline && *s=='-')
1.1154 + enddash=1;
1.1155 check_for_control_characters(aline);
1.1156 - if (warnings->bin)
1.1157 + if (warnings->bin)
1.1158 check_for_odd_characters(aline,warnings,isemptyline);
1.1159 - if (warnings->longline)
1.1160 + if (warnings->longline)
1.1161 check_for_long_line(aline);
1.1162 - if (warnings->shortline)
1.1163 + if (warnings->shortline)
1.1164 check_for_short_line(aline,&last);
1.1165 - last.blen=last.len;
1.1166 - last.len=strlen(aline);
1.1167 - last.start=aline[0];
1.1168 + last.blen=last.len;
1.1169 + last.len=strlen(aline);
1.1170 + last.start=aline[0];
1.1171 check_for_starting_punctuation(aline);
1.1172 - if (warnings->dash)
1.1173 + if (warnings->dash)
1.1174 {
1.1175 check_for_spaced_emdash(aline);
1.1176 check_for_spaced_dash(aline);
1.1177 @@ -2703,25 +2700,25 @@
1.1178 check_for_miscased_genative(aline);
1.1179 check_end_of_line(aline,warnings);
1.1180 check_for_unspaced_bracket(aline);
1.1181 - if (warnings->endquote)
1.1182 + if (warnings->endquote)
1.1183 check_for_unpunctuated_endquote(aline);
1.1184 check_for_html_tag(aline);
1.1185 check_for_html_entity(aline);
1.1186 - if (isemptyline)
1.1187 + if (isemptyline)
1.1188 {
1.1189 check_for_mismatched_quotes(&counters,&pending);
1.1190 memset(&counters,0,sizeof(counters));
1.1191 /* let the next iteration know that it's starting a new para */
1.1192 - isnewpara=1;
1.1193 + isnewpara=1;
1.1194 check_for_omitted_punctuation(prevline,&last,start_para_line);
1.1195 }
1.1196 - strcpy(prevline,aline);
1.1197 + strcpy(prevline,aline);
1.1198 }
1.1199 fclose(infile);
1.1200 if (!pswit[OVERVIEW_SWITCH])
1.1201 - for (i=0;i<MAX_QWORD;i++)
1.1202 - if (dupcnt[i])
1.1203 - printf("\nNote: Queried word %s was duplicated %d time%s\n",
1.1204 + for (i=0;i<MAX_QWORD;i++)
1.1205 + if (dupcnt[i])
1.1206 + printf("\nNote: Queried word %s was duplicated %d time%s\n",
1.1207 qword[i],dupcnt[i],"s");
1.1208 }
1.1209
1.1210 @@ -2742,68 +2739,68 @@
1.1211 c=cint=fgetc(thefile);
1.1212 do
1.1213 {
1.1214 - if (cint==EOF)
1.1215 - return NULL;
1.1216 + if (cint==EOF)
1.1217 + return NULL;
1.1218 /* either way, it's end of line */
1.1219 - if (c==10)
1.1220 + if (c==10)
1.1221 {
1.1222 - if (isCR)
1.1223 - break;
1.1224 - else
1.1225 + if (isCR)
1.1226 + break;
1.1227 + else
1.1228 {
1.1229 /* Error - a LF without a preceding CR */
1.1230 - if (pswit[LINE_END_SWITCH])
1.1231 + if (pswit[LINE_END_SWITCH])
1.1232 {
1.1233 - if (pswit[ECHO_SWITCH])
1.1234 + if (pswit[ECHO_SWITCH])
1.1235 printf("\n%s\n",theline);
1.1236 - if (!pswit[OVERVIEW_SWITCH])
1.1237 - printf(" Line %ld - No CR?\n",lcnt);
1.1238 - else
1.1239 - cnt_lineend++;
1.1240 + if (!pswit[OVERVIEW_SWITCH])
1.1241 + printf(" Line %ld - No CR?\n",lcnt);
1.1242 + else
1.1243 + cnt_lineend++;
1.1244 }
1.1245 - break;
1.1246 + break;
1.1247 }
1.1248 }
1.1249 - if (c==13)
1.1250 + if (c==13)
1.1251 {
1.1252 - if (isCR)
1.1253 + if (isCR)
1.1254 {
1.1255 /* Error - two successive CRs */
1.1256 - if (pswit[LINE_END_SWITCH])
1.1257 + if (pswit[LINE_END_SWITCH])
1.1258 {
1.1259 - if (pswit[ECHO_SWITCH])
1.1260 + if (pswit[ECHO_SWITCH])
1.1261 printf("\n%s\n",theline);
1.1262 - if (!pswit[OVERVIEW_SWITCH])
1.1263 - printf(" Line %ld - Two successive CRs?\n",lcnt);
1.1264 - else
1.1265 - cnt_lineend++;
1.1266 + if (!pswit[OVERVIEW_SWITCH])
1.1267 + printf(" Line %ld - Two successive CRs?\n",lcnt);
1.1268 + else
1.1269 + cnt_lineend++;
1.1270 }
1.1271 }
1.1272 - isCR=1;
1.1273 + isCR=1;
1.1274 }
1.1275 - else
1.1276 + else
1.1277 {
1.1278 - if (pswit[LINE_END_SWITCH] && isCR)
1.1279 + if (pswit[LINE_END_SWITCH] && isCR)
1.1280 {
1.1281 - if (pswit[ECHO_SWITCH])
1.1282 + if (pswit[ECHO_SWITCH])
1.1283 printf("\n%s\n",theline);
1.1284 - if (!pswit[OVERVIEW_SWITCH])
1.1285 - printf(" Line %ld column %d - CR without LF?\n",
1.1286 + if (!pswit[OVERVIEW_SWITCH])
1.1287 + printf(" Line %ld column %d - CR without LF?\n",
1.1288 lcnt,len+1);
1.1289 - else
1.1290 - cnt_lineend++;
1.1291 + else
1.1292 + cnt_lineend++;
1.1293 }
1.1294 - theline[len]=c;
1.1295 - len++;
1.1296 - theline[len]=0;
1.1297 - isCR=0;
1.1298 + theline[len]=c;
1.1299 + len++;
1.1300 + theline[len]=0;
1.1301 + isCR=0;
1.1302 }
1.1303 - c=cint=fgetc(thefile);
1.1304 + c=cint=fgetc(thefile);
1.1305 } while(len<maxlen);
1.1306 if (pswit[MARKUP_SWITCH])
1.1307 - postprocess_for_HTML(theline);
1.1308 + postprocess_for_HTML(theline);
1.1309 if (pswit[DP_SWITCH])
1.1310 - postprocess_for_DP(theline);
1.1311 + postprocess_for_DP(theline);
1.1312 return theline;
1.1313 }
1.1314
1.1315 @@ -2822,43 +2819,43 @@
1.1316 char *s;
1.1317 wehaveadigit=wehavealetter=query=0;
1.1318 for (s=checkword;*s;s++)
1.1319 - if (gcisalpha(*s))
1.1320 - wehavealetter=1;
1.1321 - else
1.1322 - if (gcisdigit(*s))
1.1323 - wehaveadigit=1;
1.1324 + if (gcisalpha(*s))
1.1325 + wehavealetter=1;
1.1326 + else
1.1327 + if (gcisdigit(*s))
1.1328 + wehaveadigit=1;
1.1329 if (wehaveadigit && wehavealetter)
1.1330 {
1.1331 /* Now exclude common legit cases, like "21st" and "12l. 3s. 11d." */
1.1332 - query=1;
1.1333 - wl=strlen(checkword);
1.1334 - for (firstdigits=0;gcisdigit(checkword[firstdigits]);firstdigits++)
1.1335 - ;
1.1336 - /* digits, ending in st, rd, nd, th of either case */
1.1337 - if (firstdigits+2==wl && (matchword(checkword+wl-2,"st") ||
1.1338 + query=1;
1.1339 + wl=strlen(checkword);
1.1340 + for (firstdigits=0;gcisdigit(checkword[firstdigits]);firstdigits++)
1.1341 + ;
1.1342 + /* digits, ending in st, rd, nd, th of either case */
1.1343 + if (firstdigits+2==wl && (matchword(checkword+wl-2,"st") ||
1.1344 matchword(checkword+wl-2,"rd") || matchword(checkword+wl-2,"nd") ||
1.1345 matchword(checkword+wl-2,"th")))
1.1346 query=0;
1.1347 - if (firstdigits+3==wl && (matchword(checkword+wl-3,"sts") ||
1.1348 + if (firstdigits+3==wl && (matchword(checkword+wl-3,"sts") ||
1.1349 matchword(checkword+wl-3,"rds") || matchword(checkword+wl-3,"nds") ||
1.1350 matchword(checkword+wl-3,"ths")))
1.1351 query=0;
1.1352 - if (firstdigits+3==wl && (matchword(checkword+wl-4,"stly") ||
1.1353 + if (firstdigits+3==wl && (matchword(checkword+wl-4,"stly") ||
1.1354 matchword(checkword+wl-4,"rdly") ||
1.1355 matchword(checkword+wl-4,"ndly") || matchword(checkword+wl-4,"thly")))
1.1356 query=0;
1.1357 - /* digits, ending in l, L, s or d */
1.1358 - if (firstdigits+1==wl && (checkword[wl-1]=='l' ||
1.1359 + /* digits, ending in l, L, s or d */
1.1360 + if (firstdigits+1==wl && (checkword[wl-1]=='l' ||
1.1361 checkword[wl-1]=='L' || checkword[wl-1]=='s' || checkword[wl-1]=='d'))
1.1362 query=0;
1.1363 - /*
1.1364 + /*
1.1365 * L at the start of a number, representing Britsh pounds, like L500.
1.1366 - * This is cute. We know the current word is mixeddigit. If the first
1.1367 - * letter is L, there must be at least one digit following. If both
1.1368 - * digits and letters follow, we have a genuine error, else we have a
1.1369 - * capital L followed by digits, and we accept that as a non-error.
1.1370 + * This is cute. We know the current word is mixeddigit. If the first
1.1371 + * letter is L, there must be at least one digit following. If both
1.1372 + * digits and letters follow, we have a genuine error, else we have a
1.1373 + * capital L followed by digits, and we accept that as a non-error.
1.1374 */
1.1375 - if (checkword[0]=='L' && !mixdigit(checkword+1))
1.1376 + if (checkword[0]=='L' && !mixdigit(checkword+1))
1.1377 query=0;
1.1378 }
1.1379 return query;
1.1380 @@ -2872,7 +2869,7 @@
1.1381 * at least that's the aim.
1.1382 *
1.1383 * Returns: a pointer to the position in the line where we will start
1.1384 - * looking for the next word.
1.1385 + * looking for the next word.
1.1386 */
1.1387 const char *getaword(const char *fromline,char *thisword)
1.1388 {
1.1389 @@ -2895,17 +2892,17 @@
1.1390 wordlen<MAXWORDLEN;s++)
1.1391 {
1.1392 thisword[wordlen]=*s;
1.1393 - wordlen++;
1.1394 + wordlen++;
1.1395 }
1.1396 thisword[wordlen]=0;
1.1397 for (i=1;i<wordlen-1;i++)
1.1398 {
1.1399 - if (thisword[i]=='.' || thisword[i]==',')
1.1400 + if (thisword[i]=='.' || thisword[i]==',')
1.1401 {
1.1402 - if (gcisdigit(thisword[i-1]) && gcisdigit(thisword[i-1]))
1.1403 + if (gcisdigit(thisword[i-1]) && gcisdigit(thisword[i-1]))
1.1404 {
1.1405 - fromline=s;
1.1406 - return fromline;
1.1407 + fromline=s;
1.1408 + return fromline;
1.1409 }
1.1410 }
1.1411 }
1.1412 @@ -2914,8 +2911,8 @@
1.1413 for (;(gcisdigit(*fromline) || gcisalpha(*fromline) || *fromline=='\'') &&
1.1414 wordlen<MAXWORDLEN;fromline++)
1.1415 {
1.1416 - thisword[wordlen]=*fromline;
1.1417 - wordlen++;
1.1418 + thisword[wordlen]=*fromline;
1.1419 + wordlen++;
1.1420 }
1.1421 thisword[wordlen]=0;
1.1422 return fromline;
1.1423 @@ -2933,8 +2930,8 @@
1.1424 return 0;
1.1425 ismatch=1; /* assume a match until we find a difference */
1.1426 for (i=0;i<strlen(checkfor);i++)
1.1427 - if (toupper(checkfor[i])!=toupper(thisword[i]))
1.1428 - ismatch=0;
1.1429 + if (toupper(checkfor[i])!=toupper(thisword[i]))
1.1430 + ismatch=0;
1.1431 return ismatch;
1.1432 }
1.1433
1.1434 @@ -2943,12 +2940,11 @@
1.1435 *
1.1436 * Lowercase the line.
1.1437 */
1.1438 -
1.1439 void lowerit(char *theline)
1.1440 {
1.1441 for (;*theline;theline++)
1.1442 - if (*theline>='A' && *theline<='Z')
1.1443 - *theline+=32;
1.1444 + if (*theline>='A' && *theline<='Z')
1.1445 + *theline+=32;
1.1446 }
1.1447
1.1448 /*
1.1449 @@ -3071,21 +3067,21 @@
1.1450 char *s,*t;
1.1451 int i;
1.1452 if (!*theline)
1.1453 - return;
1.1454 + return;
1.1455 for (i=0;*DPmarkup[i];i++)
1.1456 {
1.1457 - s=strstr(theline,DPmarkup[i]);
1.1458 - while (s)
1.1459 + s=strstr(theline,DPmarkup[i]);
1.1460 + while (s)
1.1461 {
1.1462 - t=s+strlen(DPmarkup[i]);
1.1463 - while (*t)
1.1464 + t=s+strlen(DPmarkup[i]);
1.1465 + while (*t)
1.1466 {
1.1467 - *s=*t;
1.1468 - t++;
1.1469 + *s=*t;
1.1470 + t++;
1.1471 s++;
1.1472 }
1.1473 - *s=0;
1.1474 - s=strstr(theline,DPmarkup[i]);
1.1475 + *s=0;
1.1476 + s=strstr(theline,DPmarkup[i]);
1.1477 }
1.1478 }
1.1479 }
1.1480 @@ -3102,10 +3098,10 @@
1.1481 void postprocess_for_HTML(char *theline)
1.1482 {
1.1483 if (strstr(theline,"<") && strstr(theline,">"))
1.1484 - while (losemarkup(theline))
1.1485 - ;
1.1486 + while (losemarkup(theline))
1.1487 + ;
1.1488 while (loseentities(theline))
1.1489 - ;
1.1490 + ;
1.1491 }
1.1492
1.1493 char *losemarkup(char *theline)
1.1494 @@ -3113,25 +3109,25 @@
1.1495 char *s,*t;
1.1496 int i;
1.1497 if (!*theline)
1.1498 - return NULL;
1.1499 + return NULL;
1.1500 s=strstr(theline,"<");
1.1501 t=strstr(theline,">");
1.1502 if (!s || !t)
1.1503 return NULL;
1.1504 for (i=0;*markup[i];i++)
1.1505 - if (!tagcomp(s+1,markup[i]))
1.1506 + if (!tagcomp(s+1,markup[i]))
1.1507 {
1.1508 - if (!t[1])
1.1509 + if (!t[1])
1.1510 {
1.1511 - *s=0;
1.1512 - return s;
1.1513 + *s=0;
1.1514 + return s;
1.1515 }
1.1516 - else if (t>s)
1.1517 + else if (t>s)
1.1518 {
1.1519 strcpy(s,t+1);
1.1520 return s;
1.1521 }
1.1522 - }
1.1523 + }
1.1524 /* It's an unrecognized <xxx>. */
1.1525 return NULL;
1.1526 }
1.1527 @@ -3141,35 +3137,35 @@
1.1528 int i;
1.1529 char *s,*t;
1.1530 if (!*theline)
1.1531 - return NULL;
1.1532 + return NULL;
1.1533 for (i=0;*entities[i].htmlent;i++)
1.1534 {
1.1535 - s=strstr(theline,entities[i].htmlent);
1.1536 - if (s)
1.1537 + s=strstr(theline,entities[i].htmlent);
1.1538 + if (s)
1.1539 {
1.1540 - t=malloc((size_t)strlen(s));
1.1541 - if (!t)
1.1542 + t=malloc((size_t)strlen(s));
1.1543 + if (!t)
1.1544 return NULL;
1.1545 - strcpy(t,s+strlen(entities[i].htmlent));
1.1546 - strcpy(s,entities[i].textent);
1.1547 - strcat(s,t);
1.1548 - free(t);
1.1549 - return theline;
1.1550 + strcpy(t,s+strlen(entities[i].htmlent));
1.1551 + strcpy(s,entities[i].textent);
1.1552 + strcat(s,t);
1.1553 + free(t);
1.1554 + return theline;
1.1555 }
1.1556 }
1.1557 for (i=0;*entities[i].htmlnum;i++)
1.1558 {
1.1559 - s=strstr(theline,entities[i].htmlnum);
1.1560 - if (s)
1.1561 + s=strstr(theline,entities[i].htmlnum);
1.1562 + if (s)
1.1563 {
1.1564 - t=malloc((size_t)strlen(s));
1.1565 - if (!t)
1.1566 + t=malloc((size_t)strlen(s));
1.1567 + if (!t)
1.1568 return NULL;
1.1569 - strcpy(t,s+strlen(entities[i].htmlnum));
1.1570 - strcpy(s,entities[i].textent);
1.1571 - strcat(s,t);
1.1572 - free(t);
1.1573 - return theline;
1.1574 + strcpy(t,s+strlen(entities[i].htmlnum));
1.1575 + strcpy(s,entities[i].textent);
1.1576 + strcat(s,t);
1.1577 + free(t);
1.1578 + return theline;
1.1579 }
1.1580 }
1.1581 return NULL;
1.1582 @@ -3184,9 +3180,9 @@
1.1583 t++; /* ignore a slash */
1.1584 while (*s && *t)
1.1585 {
1.1586 - if (tolower(*s)!=tolower(*t))
1.1587 + if (tolower(*s)!=tolower(*t))
1.1588 return 1;
1.1589 - s++;
1.1590 + s++;
1.1591 t++;
1.1592 }
1.1593 return 0;