1.1 --- a/bookloupe/bookloupe.c Sat May 25 09:40:56 2013 +0100
1.2 +++ b/bookloupe/bookloupe.c Sat May 25 17:01:36 2013 +0100
1.3 @@ -888,6 +888,109 @@
1.4 return &warnings;
1.5 }
1.6
1.7 +struct counters {
1.8 + long quot;
1.9 + signed int c_unders,c_brack,s_brack,r_brack;
1.10 + signed int open_single_quote,close_single_quote;
1.11 +};
1.12 +
1.13 +/*
1.14 + * analyse_quotes:
1.15 + *
1.16 + * Look along the line, accumulate the count of quotes, and see
1.17 + * if this is an empty line - i.e. a line with nothing on it
1.18 + * but spaces.
1.19 + * If line has just spaces, period, * and/or - on it, don't
1.20 + * count it, since empty lines with asterisks or dashes to
1.21 + * separate sections are common.
1.22 + *
1.23 + * Returns: Non-zero if the line is empty.
1.24 + */
1.25 +int analyse_quotes(const char *s,struct counters *counters)
1.26 +{
1.27 + signed int guessquote=0;
1.28 + int isemptyline=1; /* assume the line is empty until proven otherwise */
1.29 + while (*s)
1.30 + {
1.31 + if (*s==CHAR_DQUOTE)
1.32 + counters->quot++;
1.33 + if (*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE)
1.34 + {
1.35 + if (s==aline)
1.36 + {
1.37 + /*
1.38 + * At start of line, it can only be an openquote.
1.39 + * Hardcode a very common exception!
1.40 + */
1.41 + if (strncmp(s+2,"tis",3) && strncmp(s+2,"Tis",3))
1.42 + counters->open_single_quote++;
1.43 + }
1.44 + else if (gcisalpha(s[-1]) && gcisalpha(s[1]))
1.45 + /* Do nothing! it's definitely an apostrophe, not a quote */
1.46 + ;
1.47 + /* it's outside a word - let's check it out */
1.48 + else if (*s==CHAR_OPEN_SQUOTE || gcisalpha(s[1]))
1.49 + {
1.50 + /* it damwell better BE an openquote */
1.51 + if (strncmp(s+1,"tis",3) && strncmp(s+1,"Tis",3))
1.52 + /* hardcode a very common exception! */
1.53 + counters->open_single_quote++;
1.54 + }
1.55 + else
1.56 + {
1.57 + /* now - is it a closequote? */
1.58 + guessquote=0; /* accumulate clues */
1.59 + if (gcisalpha(s[-1]))
1.60 + {
1.61 + /* it follows a letter - could be either */
1.62 + guessquote++;
1.63 + if (s[-1]=='s')
1.64 + {
1.65 + /* looks like a plural apostrophe */
1.66 + guessquote-=3;
1.67 + if (s[1]==CHAR_SPACE) /* bonus marks! */
1.68 + guessquote-=2;
1.69 + }
1.70 + }
1.71 + /* it doesn't have a letter either side */
1.72 + else if (strchr(".?!,;:",s[-1]) && strchr(".?!,;: ",s[1]))
1.73 + guessquote+=8; /* looks like a closequote */
1.74 + else
1.75 + guessquote++;
1.76 + if (counters->open_single_quote>counters->close_single_quote)
1.77 + /*
1.78 + * Give it the benefit of some doubt,
1.79 + * if a squote is already open.
1.80 + */
1.81 + guessquote++;
1.82 + else
1.83 + guessquote--;
1.84 + if (guessquote>=0)
1.85 + counters->close_single_quote++;
1.86 + }
1.87 + }
1.88 + if (*s!=CHAR_SPACE && *s!='-' && *s!='.' && *s!=CHAR_ASTERISK &&
1.89 + *s!=13 && *s!=10)
1.90 + isemptyline=0; /* ignore lines like * * * as spacers */
1.91 + if (*s==CHAR_UNDERSCORE)
1.92 + counters->c_unders++;
1.93 + if (*s==CHAR_OPEN_CBRACK)
1.94 + counters->c_brack++;
1.95 + if (*s==CHAR_CLOSE_CBRACK)
1.96 + counters->c_brack--;
1.97 + if (*s==CHAR_OPEN_RBRACK)
1.98 + counters->r_brack++;
1.99 + if (*s==CHAR_CLOSE_RBRACK)
1.100 + counters->r_brack--;
1.101 + if (*s==CHAR_OPEN_SBRACK)
1.102 + counters->s_brack++;
1.103 + if (*s==CHAR_CLOSE_SBRACK)
1.104 + counters->s_brack--;
1.105 + s++;
1.106 + }
1.107 + return isemptyline;
1.108 +}
1.109 +
1.110 /*
1.111 * procfile:
1.112 *
1.113 @@ -901,13 +1004,13 @@
1.114 FILE *infile;
1.115 struct first_pass_results *first_pass_results;
1.116 struct warnings *warnings;
1.117 - long quot,squot,start_para_line;
1.118 - signed int i,j,llen,isemptyline,isacro,isellipsis,istypo,alower,
1.119 + struct counters counters={0};
1.120 + int isemptyline;
1.121 + long squot,start_para_line;
1.122 + signed int i,j,llen,isacro,isellipsis,istypo,alower,
1.123 eNon_A,eTab,eTilde,eAst,eFSlash,eCarat;
1.124 unsigned int lastlen,lastblen;
1.125 - signed int s_brack,c_brack,r_brack,c_unders;
1.126 - signed int open_single_quote,close_single_quote,guessquote,dquotepar,
1.127 - squotepar;
1.128 + signed int dquotepar,squotepar;
1.129 signed int isnewpara,vowel,consonant;
1.130 char dquote_err[80],squote_err[80],rbrack_err[80],sbrack_err[80],
1.131 cbrack_err[80],unders_err[80];
1.132 @@ -918,12 +1021,12 @@
1.133 *dquote_err=*squote_err=*rbrack_err=*cbrack_err=*sbrack_err=
1.134 *unders_err=*prevline=0;
1.135 linecnt=checked_linecnt=start_para_line=0;
1.136 - quot=squot=s_brack=c_brack=r_brack=c_unders=0;
1.137 - i=llen=isemptyline=isacro=isellipsis=istypo=0;
1.138 + squot=0;
1.139 + i=llen=isacro=isellipsis=istypo=0;
1.140 isnewpara=vowel=consonant=enddash=0;
1.141 qword_index=qperiod_index=isdup=0;
1.142 *inword=*testword=0;
1.143 - open_single_quote=close_single_quote=guessquote=dquotepar=squotepar=0;
1.144 + dquotepar=squotepar=0;
1.145 for (j=0;j<MAX_QWORD;j++)
1.146 {
1.147 dupcnt[j]=0;
1.148 @@ -950,7 +1053,7 @@
1.149 * Here we go with the main pass. Hold onto yer hat!
1.150 * Re-init some variables we've dirtied.
1.151 */
1.152 - quot=squot=linecnt=0;
1.153 + squot=linecnt=0;
1.154 laststart=CHAR_SPACE;
1.155 lastlen=lastblen=0;
1.156 while (flgets(aline,LINEBUFSIZE-1,infile,linecnt+1))
1.157 @@ -979,7 +1082,6 @@
1.158 }
1.159 checked_linecnt++;
1.160 s=aline;
1.161 - isemptyline=1; /* assume the line is empty until proven otherwise */
1.162 /*
1.163 * If we are in a state of unbalanced quotes, and this line
1.164 * doesn't begin with a quote, output the stored error message.
1.165 @@ -1063,93 +1165,7 @@
1.166 }
1.167 *dquote_err=*squote_err=*rbrack_err=*cbrack_err=
1.168 *sbrack_err=*unders_err=0;
1.169 - /*
1.170 - * Look along the line, accumulate the count of quotes, and see
1.171 - * if this is an empty line - i.e. a line with nothing on it
1.172 - * but spaces.
1.173 - * If line has just spaces, period, * and/or - on it, don't
1.174 - * count it, since empty lines with asterisks or dashes to
1.175 - * separate sections are common.
1.176 - */
1.177 - s=aline;
1.178 - while (*s)
1.179 - {
1.180 - if (*s==CHAR_DQUOTE)
1.181 - quot++;
1.182 - if (*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE)
1.183 - {
1.184 - if (s==aline)
1.185 - {
1.186 - /*
1.187 - * At start of line, it can only be an openquote.
1.188 - * Hardcode a very common exception!
1.189 - */
1.190 - if (strncmp(s+2,"tis",3) && strncmp(s+2,"Tis",3))
1.191 - open_single_quote++;
1.192 - }
1.193 - else if (gcisalpha(*(s-1)) && gcisalpha(*(s+1)))
1.194 - /* Do nothing! it's definitely an apostrophe, not a quote */
1.195 - ;
1.196 - /* it's outside a word - let's check it out */
1.197 - else if (*s==CHAR_OPEN_SQUOTE || gcisalpha(*(s+1)))
1.198 - {
1.199 - /* it damwell better BE an openquote */
1.200 - if (strncmp(s+1,"tis",3) && strncmp(s+1,"Tis",3))
1.201 - /* hardcode a very common exception! */
1.202 - open_single_quote++;
1.203 - }
1.204 - else
1.205 - {
1.206 - /* now - is it a closequote? */
1.207 - guessquote=0; /* accumulate clues */
1.208 - if (gcisalpha(s[-1]))
1.209 - {
1.210 - /* it follows a letter - could be either */
1.211 - guessquote+=1;
1.212 - if (s[-1]=='s')
1.213 - {
1.214 - /* looks like a plural apostrophe */
1.215 - guessquote-=3;
1.216 - if (s[1]==CHAR_SPACE) /* bonus marks! */
1.217 - guessquote-=2;
1.218 - }
1.219 - }
1.220 - /* it doesn't have a letter either side */
1.221 - else if (strchr(".?!,;:",s[-1]) && strchr(".?!,;: ",s[1]))
1.222 - guessquote+=8; /* looks like a closequote */
1.223 - else
1.224 - guessquote++;
1.225 - if (open_single_quote>close_single_quote)
1.226 - /*
1.227 - * Give it the benefit of some doubt,
1.228 - * if a squote is already open.
1.229 - */
1.230 - guessquote++;
1.231 - else
1.232 - guessquote--;
1.233 - if (guessquote>=0)
1.234 - close_single_quote++;
1.235 - }
1.236 - }
1.237 - if (*s!=CHAR_SPACE && *s!='-' && *s!='.' && *s!=CHAR_ASTERISK &&
1.238 - *s!=13 && *s!=10)
1.239 - isemptyline=0; /* ignore lines like * * * as spacers */
1.240 - if (*s==CHAR_UNDERSCORE)
1.241 - c_unders++;
1.242 - if (*s==CHAR_OPEN_CBRACK)
1.243 - c_brack++;
1.244 - if (*s==CHAR_CLOSE_CBRACK)
1.245 - c_brack--;
1.246 - if (*s==CHAR_OPEN_RBRACK)
1.247 - r_brack++;
1.248 - if (*s==CHAR_CLOSE_RBRACK)
1.249 - r_brack--;
1.250 - if (*s==CHAR_OPEN_SBRACK)
1.251 - s_brack++;
1.252 - if (*s==CHAR_CLOSE_SBRACK)
1.253 - s_brack--;
1.254 - s++;
1.255 - }
1.256 + isemptyline=analyse_quotes(aline,&counters);
1.257 if (isnewpara && !isemptyline)
1.258 {
1.259 /* This line is the start of a new paragraph. */
1.260 @@ -2393,35 +2409,34 @@
1.261 if (isemptyline)
1.262 {
1.263 /* end of para - add up the totals */
1.264 - if (quot%2)
1.265 + if (counters.quot%2)
1.266 sprintf(dquote_err," Line %ld - Mismatched quotes\n",
1.267 linecnt);
1.268 - if (pswit[SQUOTE_SWITCH] && open_single_quote &&
1.269 - open_single_quote!=close_single_quote)
1.270 + if (pswit[SQUOTE_SWITCH] && counters.open_single_quote &&
1.271 + counters.open_single_quote!=counters.close_single_quote)
1.272 sprintf(squote_err," Line %ld - Mismatched singlequotes?\n",
1.273 linecnt);
1.274 - if (pswit[SQUOTE_SWITCH] && open_single_quote &&
1.275 - open_single_quote!=close_single_quote &&
1.276 - open_single_quote!=close_single_quote+1)
1.277 + if (pswit[SQUOTE_SWITCH] && counters.open_single_quote &&
1.278 + counters.open_single_quote!=counters.close_single_quote &&
1.279 + counters.open_single_quote!=counters.close_single_quote+1)
1.280 /*
1.281 * Flag it to be noted regardless of the
1.282 * first char of the next para.
1.283 */
1.284 squot=1;
1.285 - if (r_brack)
1.286 + if (counters.r_brack)
1.287 sprintf(rbrack_err," Line %ld - "
1.288 "Mismatched round brackets?\n",linecnt);
1.289 - if (s_brack)
1.290 + if (counters.s_brack)
1.291 sprintf(sbrack_err," Line %ld - "
1.292 "Mismatched square brackets?\n",linecnt);
1.293 - if (c_brack)
1.294 + if (counters.c_brack)
1.295 sprintf(cbrack_err," Line %ld - "
1.296 "Mismatched curly brackets?\n",linecnt);
1.297 - if (c_unders%2)
1.298 + if (counters.c_unders%2)
1.299 sprintf(unders_err," Line %ld - Mismatched underscores?\n",
1.300 linecnt);
1.301 - quot=s_brack=c_brack=r_brack=c_unders=open_single_quote=
1.302 - close_single_quote=0;
1.303 + memset(&counters,0,sizeof(counters));
1.304 /* let the next iteration know that it's starting a new para */
1.305 isnewpara=1;
1.306 }