# HG changeset patch # User ali # Date 1369641784 -3600 # Node ID adb087007d082cc23390b68107f47584f937668e # Parent 8650633521467843eec6cb278103e9599084c10e Cleanup diff -r 865063352146 -r adb087007d08 bookloupe/bookloupe.c --- a/bookloupe/bookloupe.c Sun May 26 22:43:45 2013 +0100 +++ b/bookloupe/bookloupe.c Mon May 27 09:03:04 2013 +0100 @@ -1,21 +1,21 @@ /*************************************************************************/ /* bookloupe--check for assorted weirdnesses in a PG candidate text file */ -/* */ -/* Copyright 2000-2005 Jim Tinsley */ -/* Copyright 2012- J. Ali Harlow */ -/* */ +/* */ +/* Copyright 2000-2005 Jim Tinsley */ +/* Copyright 2012- J. Ali Harlow */ +/* */ /* This program is free software; you can redistribute it and/or modify */ /* it under the terms of the GNU General Public License as published by */ /* the Free Software Foundation; either version 2 of the License, or */ -/* (at your option) any later version. */ -/* */ +/* (at your option) any later version. */ +/* */ /* This program is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ -/* GNU General Public License for more details. */ -/* */ -/* You should have received a copy of the GNU General Public License */ -/* along with this program. If not, see . */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program. If not, see . */ /*************************************************************************/ #include @@ -23,8 +23,8 @@ #include #include -#define MAXWORDLEN 80 /* max length of one word */ -#define LINEBUFSIZE 2048 /* buffer size for an input line */ +#define MAXWORDLEN 80 /* max length of one word */ +#define LINEBUFSIZE 2048 /* buffer size for an input line */ #define MAX_USER_TYPOS 1000 #define USERTYPO_FILE "gutcheck.typ" @@ -257,17 +257,17 @@ }; /* special characters */ -#define CHAR_SPACE 32 -#define CHAR_TAB 9 -#define CHAR_LF 10 -#define CHAR_CR 13 -#define CHAR_DQUOTE 34 -#define CHAR_SQUOTE 39 +#define CHAR_SPACE 32 +#define CHAR_TAB 9 +#define CHAR_LF 10 +#define CHAR_CR 13 +#define CHAR_DQUOTE 34 +#define CHAR_SQUOTE 39 #define CHAR_OPEN_SQUOTE 96 -#define CHAR_TILDE 126 -#define CHAR_ASTERISK 42 -#define CHAR_FORESLASH 47 -#define CHAR_CARAT 94 +#define CHAR_TILDE 126 +#define CHAR_ASTERISK 42 +#define CHAR_FORESLASH 47 +#define CHAR_CARAT 94 #define CHAR_UNDERSCORE '_' #define CHAR_OPEN_CBRACK '{' @@ -282,31 +282,31 @@ #define WAY_TOO_LONG 80 #define SHORTEST_PG_LINE 55 -#define SWITCHES "ESTPXLOYHWVMUD" /* switches:- */ - /* D - ignore DP-specific markup */ - /* E - echo queried line */ - /* S - check single quotes */ - /* T - check common typos */ - /* P - require closure of quotes on */ - /* every paragraph */ - /* X - "Trust no one" :-) Paranoid! */ - /* Queries everything */ - /* L - line end checking defaults on */ - /* -L turns it off */ - /* O - overview. Just shows counts. */ - /* Y - puts errors to stdout */ - /* instead of stderr */ - /* H - Echoes header fields */ - /* M - Ignore markup in < > */ - /* U - Use file of User-defined Typos*/ - /* W - Defaults for use on Web upload*/ - /* V - Verbose - list EVERYTHING! */ -#define SWITNO 14 /* max number of switch parms */ - /* - used for defining array-size */ -#define MINARGS 1 /* minimum no of args excl switches */ -#define MAXARGS 1 /* maximum no of args excl switches */ +#define SWITCHES "ESTPXLOYHWVMUD" /* switches:- */ + /* D - ignore DP-specific markup */ + /* E - echo queried line */ + /* S - check single quotes */ + /* T - check common typos */ + /* P - require closure of quotes on */ + /* every paragraph */ + /* X - "Trust no one" :-) Paranoid! */ + /* Queries everything */ + /* L - line end checking defaults on */ + /* -L turns it off */ + /* O - overview. Just shows counts. */ + /* Y - puts errors to stdout */ + /* instead of stderr */ + /* H - Echoes header fields */ + /* M - Ignore markup in < > */ + /* U - Use file of User-defined Typos */ + /* W - Defaults for use on Web upload */ + /* V - Verbose - list EVERYTHING! */ +#define SWITNO 14 /* max number of switch parms */ + /* - used for defining array-size */ +#define MINARGS 1 /* minimum no of args excl switches */ +#define MAXARGS 1 /* maximum no of args excl switches */ -int pswit[SWITNO]; /* program switches set by SWITCHES */ +int pswit[SWITNO]; /* program switches set by SWITCHES */ #define ECHO_SWITCH 0 #define SQUOTE_SWITCH 1 @@ -321,23 +321,24 @@ #define VERBOSE_SWITCH 10 #define MARKUP_SWITCH 11 #define USERTYPO_SWITCH 12 -#define DP_SWITCH 13 +#define DP_SWITCH 13 -long cnt_dquot; /* for overview mode, count of doublequote queries */ -long cnt_squot; /* for overview mode, count of singlequote queries */ -long cnt_brack; /* for overview mode, count of brackets queries */ -long cnt_bin; /* for overview mode, count of non-ASCII queries */ -long cnt_odd; /* for overview mode, count of odd character queries */ -long cnt_long; /* for overview mode, count of long line errors */ -long cnt_short; /* for overview mode, count of short line queries */ -long cnt_punct; /* for overview mode, count of punctuation and spacing queries */ -long cnt_dash; /* for overview mode, count of dash-related queries */ -long cnt_word; /* for overview mode, count of word queries */ -long cnt_html; /* for overview mode, count of html queries */ -long cnt_lineend; /* for overview mode, count of line-end queries */ -long cnt_spacend; /* count of lines with space at end */ -long linecnt; /* count of total lines in the file */ -long checked_linecnt; /* count of lines actually checked */ +long cnt_dquot; /* for overview mode, count of doublequote queries */ +long cnt_squot; /* for overview mode, count of singlequote queries */ +long cnt_brack; /* for overview mode, count of brackets queries */ +long cnt_bin; /* for overview mode, count of non-ASCII queries */ +long cnt_odd; /* for overview mode, count of odd character queries */ +long cnt_long; /* for overview mode, count of long line errors */ +long cnt_short; /* for overview mode, count of short line queries */ +long cnt_punct; /* for overview mode, + count of punctuation and spacing queries */ +long cnt_dash; /* for overview mode, count of dash-related queries */ +long cnt_word; /* for overview mode, count of word queries */ +long cnt_html; /* for overview mode, count of html queries */ +long cnt_lineend; /* for overview mode, count of line-end queries */ +long cnt_spacend; /* count of lines with space at end */ +long linecnt; /* count of total lines in the file */ +long checked_linecnt; /* count of lines actually checked */ void proghelp(void); void procfile(char *); @@ -379,7 +380,40 @@ #define MAX_QWORD 50 #define MAX_QWORD_LENGTH 40 char qword[MAX_QWORD][MAX_QWORD_LENGTH]; -signed int dupcnt[MAX_QWORD]; +int dupcnt[MAX_QWORD]; + +struct first_pass_results { + long firstline,astline; + long footerline,totlen,binlen,alphalen,endquote_count,shortline,dotcomma; + long fslashline,hyphens,longline,verylongline,htmcount,standalone_digit; + long spacedash,emdash,space_emdash,non_PG_space_emdash,PG_space_emdash; + int Dutchcount,Frenchcount; +}; + +struct warnings { + int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen; + int endquote,isDutch,isFrench; +}; + +struct counters { + long quot; + int c_unders,c_brack,s_brack,r_brack; + int open_single_quote,close_single_quote; +}; + +struct line_properties { + unsigned int len,blen; + char start; +}; + +struct parities { + int dquote,squote; +}; + +struct pending { + char dquote[80],squote[80],rbrack[80],sbrack[80],cbrack[80],unders[80]; + long squot; +}; int main(int argc,char **argv) { @@ -389,14 +423,14 @@ FILE *usertypofile; if (strlen(argv[0])=running_from;s--) - *s=0; + *s=0; switno=strlen(SWITCHES); for (i=switno;--i>0;) - pswit[i]=0; /* initialise switches */ + pswit[i]=0; /* initialise switches */ /* * Standard loop to extract switches. * When we come out of this loop, the arguments will be @@ -404,25 +438,25 @@ * represented by their equivalent elements in pswit[] */ while (--argc>0 && **++argv=='-') - for (argsw=argv[0]+1;*argsw!='\0';argsw++) - for (i=switno,invarg=1;(--i>=0) && invarg==1;) - if ((toupper(*argsw))==SWITCHES[i]) + for (argsw=argv[0]+1;*argsw!='\0';argsw++) + for (i=switno,invarg=1;(--i>=0) && invarg==1;) + if ((toupper(*argsw))==SWITCHES[i]) { - invarg=0; - pswit[i]=1; + invarg=0; + pswit[i]=1; } /* Paranoid checking is turned OFF, not on, by its switch */ pswit[PARANOID_SWITCH]^=1; if (pswit[PARANOID_SWITCH]) /* if running in paranoid mode force typo checks as well */ - pswit[TYPO_SWITCH]=pswit[TYPO_SWITCH]^1; + pswit[TYPO_SWITCH]=pswit[TYPO_SWITCH]^1; /* Line-end checking is turned OFF, not on, by its switch */ pswit[LINE_END_SWITCH]^=1; /* Echoing is turned OFF, not on, by its switch */ pswit[ECHO_SWITCH]^=1; if (pswit[OVERVIEW_SWITCH]) /* just print summary; don't echo */ - pswit[ECHO_SWITCH]=0; + pswit[ECHO_SWITCH]=0; /* * Web uploads - for the moment, this is really just a placeholder * until we decide what processing we really want to do on web uploads @@ -430,78 +464,78 @@ if (pswit[WEB_SWITCH]) { /* specific override for web uploads */ - pswit[ECHO_SWITCH]=1; - pswit[SQUOTE_SWITCH]=0; - pswit[TYPO_SWITCH]=1; - pswit[QPARA_SWITCH]=0; - pswit[PARANOID_SWITCH]=1; - pswit[LINE_END_SWITCH]=0; - pswit[OVERVIEW_SWITCH]=0; - pswit[STDOUT_SWITCH]=0; - pswit[HEADER_SWITCH]=1; - pswit[VERBOSE_SWITCH]=0; - pswit[MARKUP_SWITCH]=0; - pswit[USERTYPO_SWITCH]=0; - pswit[DP_SWITCH]=0; + pswit[ECHO_SWITCH]=1; + pswit[SQUOTE_SWITCH]=0; + pswit[TYPO_SWITCH]=1; + pswit[QPARA_SWITCH]=0; + pswit[PARANOID_SWITCH]=1; + pswit[LINE_END_SWITCH]=0; + pswit[OVERVIEW_SWITCH]=0; + pswit[STDOUT_SWITCH]=0; + pswit[HEADER_SWITCH]=1; + pswit[VERBOSE_SWITCH]=0; + pswit[MARKUP_SWITCH]=0; + pswit[USERTYPO_SWITCH]=0; + pswit[DP_SWITCH]=0; } if (argcMAXARGS) { /* check number of args */ - proghelp(); - return 1; + proghelp(); + return 1; } /* read in the user-defined stealth scanno list */ if (pswit[USERTYPO_SWITCH]) { /* ... we were told we had one! */ - usertypofile=fopen(USERTYPO_FILE,"rb"); - if (!usertypofile) + usertypofile=fopen(USERTYPO_FILE,"rb"); + if (!usertypofile) { /* not in cwd. try excuteable directory. */ - strcpy(usertypo_file,running_from); - strcat(usertypo_file,USERTYPO_FILE); - usertypofile=fopen(usertypo_file,"rb"); - if (!usertypofile) { + strcpy(usertypo_file,running_from); + strcat(usertypo_file,USERTYPO_FILE); + usertypofile=fopen(usertypo_file,"rb"); + if (!usertypofile) { /* we ain't got no user typo file! */ - printf(" --> I couldn't find gutcheck.typ " + printf(" --> I couldn't find gutcheck.typ " "-- proceeding without user typos.\n"); } } - usertypo_count=0; - if (usertypofile) + usertypo_count=0; + if (usertypofile) { /* we managed to open a User Typo File! */ - if (pswit[USERTYPO_SWITCH]) + if (pswit[USERTYPO_SWITCH]) { - while (flgets(aline,LINEBUFSIZE-1,usertypofile, + while (flgets(aline,LINEBUFSIZE-1,usertypofile, (long)usertypo_count)) { - if (strlen(aline)>1) + if (strlen(aline)>1) { - if ((int)*aline>33) + if ((int)*aline>33) { - s=malloc(strlen(aline)+1); - if (!s) + s=malloc(strlen(aline)+1); + if (!s) { - fprintf(stderr,"bookloupe: cannot get enough " + fprintf(stderr,"bookloupe: cannot get enough " "memory for user typo file!\n"); - exit(1); + exit(1); } - strcpy(s,aline); - usertypo[usertypo_count]=s; - usertypo_count++; - if (usertypo_count>=MAX_USER_TYPOS) + strcpy(s,aline); + usertypo[usertypo_count]=s; + usertypo_count++; + if (usertypo_count>=MAX_USER_TYPOS) { - printf(" --> Only %d user-defined typos " + printf(" --> Only %d user-defined typos " "allowed: ignoring the rest\n", MAX_USER_TYPOS); - break; + break; } } } } } - fclose(usertypofile); + fclose(usertypofile); } } fprintf(stderr,"bookloupe: Check and report on an e-text\n"); @@ -513,47 +547,39 @@ { printf(" Checked %ld lines of %ld (head+foot = %ld)\n\n", checked_linecnt,linecnt,linecnt-checked_linecnt); - printf(" --------------- Queries found --------------\n"); - if (cnt_long) - printf(" Long lines: %14ld\n",cnt_long); - if (cnt_short) - printf(" Short lines: %14ld\n",cnt_short); - if (cnt_lineend) - printf(" Line-end problems: %14ld\n",cnt_lineend); - if (cnt_word) - printf(" Common typos: %14ld\n",cnt_word); - if (cnt_dquot) - printf(" Unmatched quotes: %14ld\n",cnt_dquot); - if (cnt_squot) - printf(" Unmatched SingleQuotes: %14ld\n",cnt_squot); - if (cnt_brack) - printf(" Unmatched brackets: %14ld\n",cnt_brack); - if (cnt_bin) - printf(" Non-ASCII characters: %14ld\n",cnt_bin); - if (cnt_odd) - printf(" Proofing characters: %14ld\n",cnt_odd); - if (cnt_punct) + printf(" --------------- Queries found --------------\n"); + if (cnt_long) + printf(" Long lines: %14ld\n",cnt_long); + if (cnt_short) + printf(" Short lines: %14ld\n",cnt_short); + if (cnt_lineend) + printf(" Line-end problems: %14ld\n",cnt_lineend); + if (cnt_word) + printf(" Common typos: %14ld\n",cnt_word); + if (cnt_dquot) + printf(" Unmatched quotes: %14ld\n",cnt_dquot); + if (cnt_squot) + printf(" Unmatched SingleQuotes: %14ld\n",cnt_squot); + if (cnt_brack) + printf(" Unmatched brackets: %14ld\n",cnt_brack); + if (cnt_bin) + printf(" Non-ASCII characters: %14ld\n",cnt_bin); + if (cnt_odd) + printf(" Proofing characters: %14ld\n",cnt_odd); + if (cnt_punct) printf(" Punctuation & spacing queries: %14ld\n",cnt_punct); - if (cnt_dash) - printf(" Non-standard dashes: %14ld\n",cnt_dash); - if (cnt_html) - printf(" Possible HTML tags: %14ld\n",cnt_html); - printf("\n"); - printf(" TOTAL QUERIES %14ld\n", - cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+ - cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend); + if (cnt_dash) + printf(" Non-standard dashes: %14ld\n",cnt_dash); + if (cnt_html) + printf(" Possible HTML tags: %14ld\n",cnt_html); + printf("\n"); + printf(" TOTAL QUERIES %14ld\n", + cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+ + cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend); } return 0; } -struct first_pass_results { - long firstline,astline; - long footerline,totlen,binlen,alphalen,endquote_count,shortline,dotcomma; - long fslashline,hyphens,longline,verylongline,htmcount,standalone_digit; - long spacedash,emdash,space_emdash,non_PG_space_emdash,PG_space_emdash; - signed int Dutchcount,Frenchcount; -}; - /* * first_pass: * @@ -566,140 +592,135 @@ { char laststart=CHAR_SPACE; const char *s; - signed int i,llen; + int i,llen; unsigned int lastlen=0,lastblen=0; long spline=0,nspline=0; static struct first_pass_results results={0}; char inword[MAXWORDLEN]=""; while (fgets(aline,LINEBUFSIZE-1,infile)) { - while (aline[strlen(aline)-1]==10 || aline[strlen(aline)-1]==13) + while (aline[strlen(aline)-1]==10 || aline[strlen(aline)-1]==13) aline[strlen(aline)-1]=0; - linecnt++; - if (strstr(aline,"*END") && strstr(aline,"SMALL PRINT") && + linecnt++; + if (strstr(aline,"*END") && strstr(aline,"SMALL PRINT") && (strstr(aline,"PUBLIC DOMAIN") || strstr(aline,"COPYRIGHT"))) { - if (spline) - printf(" --> Duplicate header?\n"); - spline=linecnt+1; /* first line of non-header text, that is */ + if (spline) + printf(" --> Duplicate header?\n"); + spline=linecnt+1; /* first line of non-header text, that is */ } - if (!strncmp(aline,"*** START",9) && strstr(aline,"PROJECT GUTENBERG")) + if (!strncmp(aline,"*** START",9) && strstr(aline,"PROJECT GUTENBERG")) { - if (nspline) - printf(" --> Duplicate header?\n"); - nspline=linecnt+1; /* first line of non-header text, that is */ + if (nspline) + printf(" --> Duplicate header?\n"); + nspline=linecnt+1; /* first line of non-header text, that is */ } - if (spline || nspline) + if (spline || nspline) { - lowerit(aline); - if (strstr(aline,"end") && strstr(aline,"project gutenberg")) + lowerit(aline); + if (strstr(aline,"end") && strstr(aline,"project gutenberg")) { - if (strstr(aline,"end") Duplicate footer?\n"); + if (!nspline) + printf(" --> Duplicate footer?\n"); } - else - results.footerline=linecnt; + else + results.footerline=linecnt; } } } - if (spline) + if (spline) results.firstline=spline; - if (nspline) + if (nspline) results.firstline=nspline; /* override with new */ - if (results.footerline) + if (results.footerline) continue; /* don't count the boilerplate in the footer */ - llen=strlen(aline); - results.totlen+=llen; - for (i=0;i127) + if ((unsigned char)aline[i]>127) results.binlen++; - if (gcisalpha(aline[i])) + if (gcisalpha(aline[i])) results.alphalen++; - if (i>0 && aline[i]==CHAR_DQUOTE && isalpha(aline[i-1])) + if (i>0 && aline[i]==CHAR_DQUOTE && isalpha(aline[i-1])) results.endquote_count++; } - if (strlen(aline)>2 && lastlen>2 && lastlen2 && lastlen>2 && lastlen2 && lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE) results.shortline++; - if (*aline && (unsigned char)aline[strlen(aline)-1]<=CHAR_SPACE) + if (*aline && (unsigned char)aline[strlen(aline)-1]<=CHAR_SPACE) cnt_spacend++; - if (strstr(aline,".,")) + if (strstr(aline,".,")) results.dotcomma++; - /* only count ast lines for ignoring purposes where there is */ - /* locase text on the line */ - if (strstr(aline,"*")) + /* only count ast lines for ignoring purposes where there is */ + /* locase text on the line */ + if (strstr(aline,"*")) { - for (s=aline;*s;s++) - if (*s>='a' && *s<='z') - break; - if (*s) + for (s=aline;*s;s++) + if (*s>='a' && *s<='z') + break; + if (*s) results.astline++; } - if (strstr(aline,"/")) - results.fslashline++; - for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--) + if (strstr(aline,"/")) + results.fslashline++; + for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--) ; - if (aline[i]=='-' && aline[i-1]!='-') + if (aline[i]=='-' && aline[i-1]!='-') results.hyphens++; - if (llen>LONGEST_PG_LINE) + if (llen>LONGEST_PG_LINE) results.longline++; - if (llen>WAY_TOO_LONG) + if (llen>WAY_TOO_LONG) results.verylongline++; - if (strstr(aline,"<") && strstr(aline,">")) + if (strstr(aline,"<") && strstr(aline,">")) { - i=(signed int)(strstr(aline,">")-strstr(aline,"<")+1); - if (i>0) - results.htmcount++; - if (strstr(aline,"")) + i=(int)(strstr(aline,">")-strstr(aline,"<")+1); + if (i>0) + results.htmcount++; + if (strstr(aline,"")) results.htmcount+=4; /* bonus marks! */ } - /* Check for spaced em-dashes */ - if (strstr(aline,"--")) + /* Check for spaced em-dashes */ + if (strstr(aline,"--")) { - results.emdash++; - if (*(strstr(aline,"--")-1)==CHAR_SPACE || - (*(strstr(aline,"--")+2)==CHAR_SPACE)) + results.emdash++; + if (*(strstr(aline,"--")-1)==CHAR_SPACE || + (*(strstr(aline,"--")+2)==CHAR_SPACE)) results.space_emdash++; - if (*(strstr(aline,"--")-1)==CHAR_SPACE && - (*(strstr(aline,"--")+2)==CHAR_SPACE)) + if (*(strstr(aline,"--")-1)==CHAR_SPACE && + (*(strstr(aline,"--")+2)==CHAR_SPACE)) /* count of em-dashes with spaces both sides */ results.non_PG_space_emdash++; - if (*(strstr(aline,"--")-1)!=CHAR_SPACE && - (*(strstr(aline,"--")+2)!=CHAR_SPACE)) + if (*(strstr(aline,"--")-1)!=CHAR_SPACE && + (*(strstr(aline,"--")+2)!=CHAR_SPACE)) /* count of PG-type em-dashes with no spaces */ results.PG_space_emdash++; } - for (s=aline;*s;) + for (s=aline;*s;) { - s=getaword(s,inword); - if (!strcmp(inword,"hij") || !strcmp(inword,"niet")) - results.Dutchcount++; - if (!strcmp(inword,"dans") || !strcmp(inword,"avec")) - results.Frenchcount++; - if (!strcmp(inword,"0") || !strcmp(inword,"1")) - results.standalone_digit++; + s=getaword(s,inword); + if (!strcmp(inword,"hij") || !strcmp(inword,"niet")) + results.Dutchcount++; + if (!strcmp(inword,"dans") || !strcmp(inword,"avec")) + results.Frenchcount++; + if (!strcmp(inword,"0") || !strcmp(inword,"1")) + results.standalone_digit++; } - /* Check for spaced dashes */ - if (strstr(aline," -") && *(strstr(aline," -")+2)!='-') + /* Check for spaced dashes */ + if (strstr(aline," -") && *(strstr(aline," -")+2)!='-') results.spacedash++; - lastblen=lastlen; - lastlen=strlen(aline); - laststart=aline[0]; + lastblen=lastlen; + lastlen=strlen(aline); + laststart=aline[0]; } return &results; } -struct warnings { - signed int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen; - signed int endquote,isDutch,isFrench; -}; - /* * report_first_pass: * @@ -709,13 +730,13 @@ { static struct warnings warnings={0}; if (cnt_spacend>0) - printf(" --> %ld lines in this file have white space at end\n", + printf(" --> %ld lines in this file have white space at end\n", cnt_spacend); warnings.dotcomma=1; if (results->dotcomma>5) { - warnings.dotcomma=0; - printf(" --> %ld lines in this file contain '.,'. " + warnings.dotcomma=0; + printf(" --> %ld lines in this file contain '.,'. " "Not reporting them.\n",results->dotcomma); } /* @@ -725,8 +746,8 @@ warnings.shortline=1; if (results->shortline>50 || results->shortline*10>linecnt) { - warnings.shortline=0; - printf(" --> %ld lines in this file are short. " + warnings.shortline=0; + printf(" --> %ld lines in this file are short. " "Not reporting short lines.\n",results->shortline); } /* @@ -736,16 +757,16 @@ warnings.longline=1; if (results->longline>50 || results->longline*10>linecnt) { - warnings.longline=0; - printf(" --> %ld lines in this file are long. " + warnings.longline=0; + printf(" --> %ld lines in this file are long. " "Not reporting long lines.\n",results->longline); } /* If more than 10 lines contain asterisks, don't bother reporting them. */ warnings.ast=1; if (results->astline>10) { - warnings.ast=0; - printf(" --> %ld lines in this file contain asterisks. " + warnings.ast=0; + printf(" --> %ld lines in this file contain asterisks. " "Not reporting them.\n",results->astline); } /* @@ -755,8 +776,8 @@ warnings.fslash=1; if (results->fslashline>10) { - warnings.fslash=0; - printf(" --> %ld lines in this file contain forward slashes. " + warnings.fslash=0; + printf(" --> %ld lines in this file contain forward slashes. " "Not reporting them.\n",results->fslashline); } /* @@ -766,8 +787,8 @@ warnings.endquote=1; if (results->endquote_count>20) { - warnings.endquote=0; - printf(" --> %ld lines in this file contain unpunctuated endquotes. " + warnings.endquote=0; + printf(" --> %ld lines in this file contain unpunctuated endquotes. " "Not reporting them.\n",results->endquote_count); } /* @@ -777,8 +798,8 @@ warnings.digit=1; if (results->standalone_digit>10) { - warnings.digit=0; - printf(" --> %ld lines in this file contain standalone 0s and 1s. " + warnings.digit=0; + printf(" --> %ld lines in this file contain standalone 0s and 1s. " "Not reporting them.\n",results->standalone_digit); } /* @@ -788,17 +809,17 @@ warnings.hyphen=1; if (results->hyphens>20) { - warnings.hyphen=0; - printf(" --> %ld lines in this file have hyphens at end. " + warnings.hyphen=0; + printf(" --> %ld lines in this file have hyphens at end. " "Not reporting them.\n",results->hyphens); } if (results->htmcount>20 && !pswit[MARKUP_SWITCH]) { - printf(" --> Looks like this is HTML. Switching HTML mode ON.\n"); - pswit[MARKUP_SWITCH]=1; + printf(" --> Looks like this is HTML. Switching HTML mode ON.\n"); + pswit[MARKUP_SWITCH]=1; } if (results->verylongline>0) - printf(" --> %ld lines in this file are VERY long!\n", + printf(" --> %ld lines in this file are VERY long!\n", results->verylongline); /* * If there are more non-PG spaced dashes than PG em-dashes, @@ -810,8 +831,8 @@ if (results->spacedash+results->non_PG_space_emdash> results->PG_space_emdash) { - warnings.dash=0; - printf(" --> There are %ld spaced dashes and em-dashes. " + warnings.dash=0; + printf(" --> There are %ld spaced dashes and em-dashes. " "Not reporting them.\n", results->spacedash+results->non_PG_space_emdash); } @@ -819,81 +840,75 @@ warnings.bin=1; if (results->binlen*4>results->totlen) { - printf(" --> This file does not appear to be ASCII. " + printf(" --> This file does not appear to be ASCII. " "Terminating. Best of luck with it!\n"); - exit(1); + exit(1); } if (results->alphalen*4totlen) { - printf(" --> This file does not appear to be text. " + printf(" --> This file does not appear to be text. " "Terminating. Best of luck with it!\n"); - exit(1); + exit(1); } if (results->binlen*100>results->totlen || results->binlen>100) { - printf(" --> There are a lot of foreign letters here. " + printf(" --> There are a lot of foreign letters here. " "Not reporting them.\n"); - warnings.bin=0; + warnings.bin=0; } warnings.isDutch=0; if (results->Dutchcount>50) { - warnings.isDutch=1; - printf(" --> This looks like Dutch - " + warnings.isDutch=1; + printf(" --> This looks like Dutch - " "switching off dashes and warnings for 's Middags case.\n"); } warnings.isFrench=0; if (results->Frenchcount>50) { - warnings.isFrench=1; - printf(" --> This looks like French - " + warnings.isFrench=1; + printf(" --> This looks like French - " "switching off some doublepunct.\n"); } if (results->firstline && results->footerline) - printf(" The PG header and footer appear to be already on.\n"); + printf(" The PG header and footer appear to be already on.\n"); else { - if (results->firstline) - printf(" The PG header is on - no footer.\n"); - if (results->footerline) - printf(" The PG footer is on - no header.\n"); + if (results->firstline) + printf(" The PG header is on - no footer.\n"); + if (results->footerline) + printf(" The PG footer is on - no header.\n"); } printf("\n"); if (pswit[VERBOSE_SWITCH]) { - warnings.bin=1; - warnings.shortline=1; - warnings.dotcomma=1; - warnings.longline=1; - warnings.dash=1; - warnings.digit=1; - warnings.ast=1; - warnings.fslash=1; - warnings.hyphen=1; - warnings.endquote=1; - printf(" *** Verbose output is ON -- you asked for it! ***\n"); + warnings.bin=1; + warnings.shortline=1; + warnings.dotcomma=1; + warnings.longline=1; + warnings.dash=1; + warnings.digit=1; + warnings.ast=1; + warnings.fslash=1; + warnings.hyphen=1; + warnings.endquote=1; + printf(" *** Verbose output is ON -- you asked for it! ***\n"); } if (warnings.isDutch) - warnings.dash=0; + warnings.dash=0; if (results->footerline>0 && results->firstline>0 && results->footerline>results->firstline && results->footerline-results->firstline<100) { - printf(" --> I don't really know where this text starts. \n"); - printf(" There are no reference points.\n"); - printf(" I'm going to have to report the header and footer " + printf(" --> I don't really know where this text starts. \n"); + printf(" There are no reference points.\n"); + printf(" I'm going to have to report the header and footer " "as well.\n"); - results->firstline=0; + results->firstline=0; } return &warnings; } -struct counters { - long quot; - signed int c_unders,c_brack,s_brack,r_brack; - signed int open_single_quote,close_single_quote; -}; - /* * analyse_quotes: * @@ -908,7 +923,7 @@ */ int analyse_quotes(const char *s,struct counters *counters) { - signed int guessquote=0; + int guessquote=0; int isemptyline=1; /* assume the line is empty until proven otherwise */ while (*s) { @@ -1028,7 +1043,7 @@ int isemptyline) { /* Don't repeat multiple warnings on one line. */ - signed int eNon_A=0,eTab=0,eTilde=0,eCarat=0,eFSlash=0,eAst=0; + int eNon_A=0,eTab=0,eTilde=0,eCarat=0,eFSlash=0,eAst=0; const char *s; unsigned char c; for (s=aline;*s;s++) @@ -1129,17 +1144,12 @@ printf("\n%s\n",aline); if (!pswit[OVERVIEW_SWITCH]) printf(" Line %ld column %d - Long line %d\n", - linecnt,strlen(aline),strlen(aline)); + linecnt,(int)strlen(aline),(int)strlen(aline)); else cnt_long++; } } -struct line_properties { - unsigned int len,blen; - char start; -}; - /* * check_for_short_line: * @@ -1173,7 +1183,7 @@ printf("\n%s\n",prevline); if (!pswit[OVERVIEW_SWITCH]) printf(" Line %ld column %d - Short line %d?\n", - linecnt-1,strlen(prevline),strlen(prevline)); + linecnt-1,(int)strlen(prevline),(int)strlen(prevline)); else cnt_short++; } @@ -1451,7 +1461,7 @@ void check_for_extra_period(const char *aline,const struct warnings *warnings) { const char *s,*t,*s1; - signed int i,istypo,isdup; + int i,istypo,isdup; static char qperiod[MAX_QWORD][MAX_QWORD_LENGTH]; static int qperiod_index=0; char testword[MAXWORDLEN]=""; @@ -1634,7 +1644,7 @@ istypo=0; strcpy(testword,inword); alower=0; - for (i=0;i<(signed int)strlen(testword);i++) + for (i=0;i<(int)strlen(testword);i++) { /* lowercase for testing */ if (testword[i]>='a' && testword[i]<='z') @@ -1809,10 +1819,6 @@ } } -struct parities { - int dquote,squote; -}; - /* * check_for_misspaced_punctuation: * @@ -2312,7 +2318,7 @@ close=strstr(aline,">"); if (close) { - i=(signed int)(close-open+1); + i=(int)(close-open+1); if (i>0) { strncpy(wrk,open,i); @@ -2350,7 +2356,7 @@ i=(int)(scolon-amp+1); for (s=amp;s0) { strncpy(wrk,amp,i); @@ -2367,11 +2373,6 @@ } } -struct pending { - char dquote[80],squote[80],rbrack[80],sbrack[80],cbrack[80],unders[80]; - long squot; -}; - /* * print_pending: * @@ -2551,7 +2552,7 @@ if (!pswit[OVERVIEW_SWITCH]) printf(" Line %ld column %d - " "No punctuation at para end?\n", - linecnt-1,strlen(prevline)); + linecnt-1,(int)strlen(prevline)); else cnt_punct++; break; @@ -2579,22 +2580,18 @@ struct parities parities={0}; struct pending pending={{0},}; int isemptyline; - long start_para_line; - signed int i,llen,isacro,isellipsis; - signed int isnewpara; - signed int enddash; + long start_para_line=0; + int i,isnewpara=0,enddash=0; last.start=CHAR_SPACE; *prevline=0; - linecnt=checked_linecnt=start_para_line=0; - i=llen=isacro=isellipsis=0; - isnewpara=enddash=0; + linecnt=checked_linecnt=0; infile=fopen(filename,"rb"); if (!infile) { - if (pswit[STDOUT_SWITCH]) - fprintf(stdout,"bookloupe: cannot open %s\n",filename); - else - fprintf(stderr,"bookloupe: cannot open %s\n",filename); + if (pswit[STDOUT_SWITCH]) + fprintf(stdout,"bookloupe: cannot open %s\n",filename); + else + fprintf(stderr,"bookloupe: cannot open %s\n",filename); exit(1); } fprintf(stdout,"\n\nFile: %s\n\n",filename); @@ -2607,84 +2604,84 @@ linecnt=0; while (flgets(aline,LINEBUFSIZE-1,infile,linecnt+1)) { - linecnt++; - if (linecnt==1) + linecnt++; + if (linecnt==1) isnewpara=1; - if (pswit[DP_SWITCH] && !strncmp(aline,"-----File: ",11)) + if (pswit[DP_SWITCH] && !strncmp(aline,"-----File: ",11)) continue; // skip DP page separators completely - if (linecntfirstline || + if (linecntfirstline || (first_pass_results->footerline>0 && linecnt>first_pass_results->footerline)) { - if (pswit[HEADER_SWITCH]) + if (pswit[HEADER_SWITCH]) { - if (!strncmp(aline,"Title:",6)) - printf(" %s\n",aline); - if (!strncmp(aline,"Author:",7)) - printf(" %s\n",aline); - if (!strncmp(aline,"Release Date:",13)) - printf(" %s\n",aline); - if (!strncmp(aline,"Edition:",8)) - printf(" %s\n\n",aline); + if (!strncmp(aline,"Title:",6)) + printf(" %s\n",aline); + if (!strncmp(aline,"Author:",7)) + printf(" %s\n",aline); + if (!strncmp(aline,"Release Date:",13)) + printf(" %s\n",aline); + if (!strncmp(aline,"Edition:",8)) + printf(" %s\n\n",aline); } - continue; /* skip through the header */ + continue; /* skip through the header */ } - checked_linecnt++; + checked_linecnt++; print_pending(aline,parastart,&pending); memset(&pending,0,sizeof(pending)); isemptyline=analyse_quotes(aline,&counters); - if (isnewpara && !isemptyline) + if (isnewpara && !isemptyline) { /* This line is the start of a new paragraph. */ - start_para_line=linecnt; + start_para_line=linecnt; /* Capture its first line in case we want to report it later. */ - strncpy(parastart,aline,80); - parastart[79]=0; + strncpy(parastart,aline,80); + parastart[79]=0; memset(&parities,0,sizeof(parities)); /* restart the quote count */ - s=aline; - while (!gcisalpha(*s) && !gcisdigit(*s) && *s) + s=aline; + while (!gcisalpha(*s) && !gcisdigit(*s) && *s) s++; - if (*s>='a' && *s<='z') + if (*s>='a' && *s<='z') { /* and its first letter is lowercase */ - if (pswit[ECHO_SWITCH]) + if (pswit[ECHO_SWITCH]) printf("\n%s\n",aline); - if (!pswit[OVERVIEW_SWITCH]) - printf(" Line %ld column %d - " + if (!pswit[OVERVIEW_SWITCH]) + printf(" Line %ld column %d - " "Paragraph starts with lower-case\n", linecnt,(int)(s-aline)+1); - else - cnt_punct++; + else + cnt_punct++; } - isnewpara=0; /* Signal the end of new para processing. */ + isnewpara=0; /* Signal the end of new para processing. */ } - /* Check for an em-dash broken at line end. */ - if (enddash && *aline=='-') + /* Check for an em-dash broken at line end. */ + if (enddash && *aline=='-') { - if (pswit[ECHO_SWITCH]) + if (pswit[ECHO_SWITCH]) printf("\n%s\n",aline); - if (!pswit[OVERVIEW_SWITCH]) - printf(" Line %ld column 1 - Broken em-dash?\n",linecnt); - else - cnt_punct++; + if (!pswit[OVERVIEW_SWITCH]) + printf(" Line %ld column 1 - Broken em-dash?\n",linecnt); + else + cnt_punct++; } - enddash=0; - for (s=aline+strlen(aline)-1;*s==' ' && s>aline;s--) + enddash=0; + for (s=aline+strlen(aline)-1;*s==' ' && s>aline;s--) ; - if (s>=aline && *s=='-') - enddash=1; + if (s>=aline && *s=='-') + enddash=1; check_for_control_characters(aline); - if (warnings->bin) + if (warnings->bin) check_for_odd_characters(aline,warnings,isemptyline); - if (warnings->longline) + if (warnings->longline) check_for_long_line(aline); - if (warnings->shortline) + if (warnings->shortline) check_for_short_line(aline,&last); - last.blen=last.len; - last.len=strlen(aline); - last.start=aline[0]; + last.blen=last.len; + last.len=strlen(aline); + last.start=aline[0]; check_for_starting_punctuation(aline); - if (warnings->dash) + if (warnings->dash) { check_for_spaced_emdash(aline); check_for_spaced_dash(aline); @@ -2703,25 +2700,25 @@ check_for_miscased_genative(aline); check_end_of_line(aline,warnings); check_for_unspaced_bracket(aline); - if (warnings->endquote) + if (warnings->endquote) check_for_unpunctuated_endquote(aline); check_for_html_tag(aline); check_for_html_entity(aline); - if (isemptyline) + if (isemptyline) { check_for_mismatched_quotes(&counters,&pending); memset(&counters,0,sizeof(counters)); /* let the next iteration know that it's starting a new para */ - isnewpara=1; + isnewpara=1; check_for_omitted_punctuation(prevline,&last,start_para_line); } - strcpy(prevline,aline); + strcpy(prevline,aline); } fclose(infile); if (!pswit[OVERVIEW_SWITCH]) - for (i=0;i='A' && *theline<='Z') - *theline+=32; + if (*theline>='A' && *theline<='Z') + *theline+=32; } /* @@ -3071,21 +3067,21 @@ char *s,*t; int i; if (!*theline) - return; + return; for (i=0;*DPmarkup[i];i++) { - s=strstr(theline,DPmarkup[i]); - while (s) + s=strstr(theline,DPmarkup[i]); + while (s) { - t=s+strlen(DPmarkup[i]); - while (*t) + t=s+strlen(DPmarkup[i]); + while (*t) { - *s=*t; - t++; + *s=*t; + t++; s++; } - *s=0; - s=strstr(theline,DPmarkup[i]); + *s=0; + s=strstr(theline,DPmarkup[i]); } } } @@ -3102,10 +3098,10 @@ void postprocess_for_HTML(char *theline) { if (strstr(theline,"<") && strstr(theline,">")) - while (losemarkup(theline)) - ; + while (losemarkup(theline)) + ; while (loseentities(theline)) - ; + ; } char *losemarkup(char *theline) @@ -3113,25 +3109,25 @@ char *s,*t; int i; if (!*theline) - return NULL; + return NULL; s=strstr(theline,"<"); t=strstr(theline,">"); if (!s || !t) return NULL; for (i=0;*markup[i];i++) - if (!tagcomp(s+1,markup[i])) + if (!tagcomp(s+1,markup[i])) { - if (!t[1]) + if (!t[1]) { - *s=0; - return s; + *s=0; + return s; } - else if (t>s) + else if (t>s) { strcpy(s,t+1); return s; } - } + } /* It's an unrecognized . */ return NULL; } @@ -3141,35 +3137,35 @@ int i; char *s,*t; if (!*theline) - return NULL; + return NULL; for (i=0;*entities[i].htmlent;i++) { - s=strstr(theline,entities[i].htmlent); - if (s) + s=strstr(theline,entities[i].htmlent); + if (s) { - t=malloc((size_t)strlen(s)); - if (!t) + t=malloc((size_t)strlen(s)); + if (!t) return NULL; - strcpy(t,s+strlen(entities[i].htmlent)); - strcpy(s,entities[i].textent); - strcat(s,t); - free(t); - return theline; + strcpy(t,s+strlen(entities[i].htmlent)); + strcpy(s,entities[i].textent); + strcat(s,t); + free(t); + return theline; } } for (i=0;*entities[i].htmlnum;i++) { - s=strstr(theline,entities[i].htmlnum); - if (s) + s=strstr(theline,entities[i].htmlnum); + if (s) { - t=malloc((size_t)strlen(s)); - if (!t) + t=malloc((size_t)strlen(s)); + if (!t) return NULL; - strcpy(t,s+strlen(entities[i].htmlnum)); - strcpy(s,entities[i].textent); - strcat(s,t); - free(t); - return theline; + strcpy(t,s+strlen(entities[i].htmlnum)); + strcpy(s,entities[i].textent); + strcat(s,t); + free(t); + return theline; } } return NULL; @@ -3184,9 +3180,9 @@ t++; /* ignore a slash */ while (*s && *t) { - if (tolower(*s)!=tolower(*t)) + if (tolower(*s)!=tolower(*t)) return 1; - s++; + s++; t++; } return 0;