# HG changeset patch # User ali # Date 1369497696 -3600 # Node ID e4042a067753665ff0ffdb805078affbaea97c69 # Parent 20d51419e077af81580f07f79e2a24dd33f38213 Break analyse_quotes() out diff -r 20d51419e077 -r e4042a067753 bookloupe/bookloupe.c --- a/bookloupe/bookloupe.c Sat May 25 09:40:56 2013 +0100 +++ b/bookloupe/bookloupe.c Sat May 25 17:01:36 2013 +0100 @@ -888,6 +888,109 @@ return &warnings; } +struct counters { + long quot; + signed int c_unders,c_brack,s_brack,r_brack; + signed int open_single_quote,close_single_quote; +}; + +/* + * analyse_quotes: + * + * Look along the line, accumulate the count of quotes, and see + * if this is an empty line - i.e. a line with nothing on it + * but spaces. + * If line has just spaces, period, * and/or - on it, don't + * count it, since empty lines with asterisks or dashes to + * separate sections are common. + * + * Returns: Non-zero if the line is empty. + */ +int analyse_quotes(const char *s,struct counters *counters) +{ + signed int guessquote=0; + int isemptyline=1; /* assume the line is empty until proven otherwise */ + while (*s) + { + if (*s==CHAR_DQUOTE) + counters->quot++; + if (*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE) + { + if (s==aline) + { + /* + * At start of line, it can only be an openquote. + * Hardcode a very common exception! + */ + if (strncmp(s+2,"tis",3) && strncmp(s+2,"Tis",3)) + counters->open_single_quote++; + } + else if (gcisalpha(s[-1]) && gcisalpha(s[1])) + /* Do nothing! it's definitely an apostrophe, not a quote */ + ; + /* it's outside a word - let's check it out */ + else if (*s==CHAR_OPEN_SQUOTE || gcisalpha(s[1])) + { + /* it damwell better BE an openquote */ + if (strncmp(s+1,"tis",3) && strncmp(s+1,"Tis",3)) + /* hardcode a very common exception! */ + counters->open_single_quote++; + } + else + { + /* now - is it a closequote? */ + guessquote=0; /* accumulate clues */ + if (gcisalpha(s[-1])) + { + /* it follows a letter - could be either */ + guessquote++; + if (s[-1]=='s') + { + /* looks like a plural apostrophe */ + guessquote-=3; + if (s[1]==CHAR_SPACE) /* bonus marks! */ + guessquote-=2; + } + } + /* it doesn't have a letter either side */ + else if (strchr(".?!,;:",s[-1]) && strchr(".?!,;: ",s[1])) + guessquote+=8; /* looks like a closequote */ + else + guessquote++; + if (counters->open_single_quote>counters->close_single_quote) + /* + * Give it the benefit of some doubt, + * if a squote is already open. + */ + guessquote++; + else + guessquote--; + if (guessquote>=0) + counters->close_single_quote++; + } + } + if (*s!=CHAR_SPACE && *s!='-' && *s!='.' && *s!=CHAR_ASTERISK && + *s!=13 && *s!=10) + isemptyline=0; /* ignore lines like * * * as spacers */ + if (*s==CHAR_UNDERSCORE) + counters->c_unders++; + if (*s==CHAR_OPEN_CBRACK) + counters->c_brack++; + if (*s==CHAR_CLOSE_CBRACK) + counters->c_brack--; + if (*s==CHAR_OPEN_RBRACK) + counters->r_brack++; + if (*s==CHAR_CLOSE_RBRACK) + counters->r_brack--; + if (*s==CHAR_OPEN_SBRACK) + counters->s_brack++; + if (*s==CHAR_CLOSE_SBRACK) + counters->s_brack--; + s++; + } + return isemptyline; +} + /* * procfile: * @@ -901,13 +1004,13 @@ FILE *infile; struct first_pass_results *first_pass_results; struct warnings *warnings; - long quot,squot,start_para_line; - signed int i,j,llen,isemptyline,isacro,isellipsis,istypo,alower, + struct counters counters={0}; + int isemptyline; + long squot,start_para_line; + signed int i,j,llen,isacro,isellipsis,istypo,alower, eNon_A,eTab,eTilde,eAst,eFSlash,eCarat; unsigned int lastlen,lastblen; - signed int s_brack,c_brack,r_brack,c_unders; - signed int open_single_quote,close_single_quote,guessquote,dquotepar, - squotepar; + signed int dquotepar,squotepar; signed int isnewpara,vowel,consonant; char dquote_err[80],squote_err[80],rbrack_err[80],sbrack_err[80], cbrack_err[80],unders_err[80]; @@ -918,12 +1021,12 @@ *dquote_err=*squote_err=*rbrack_err=*cbrack_err=*sbrack_err= *unders_err=*prevline=0; linecnt=checked_linecnt=start_para_line=0; - quot=squot=s_brack=c_brack=r_brack=c_unders=0; - i=llen=isemptyline=isacro=isellipsis=istypo=0; + squot=0; + i=llen=isacro=isellipsis=istypo=0; isnewpara=vowel=consonant=enddash=0; qword_index=qperiod_index=isdup=0; *inword=*testword=0; - open_single_quote=close_single_quote=guessquote=dquotepar=squotepar=0; + dquotepar=squotepar=0; for (j=0;jclose_single_quote) - /* - * Give it the benefit of some doubt, - * if a squote is already open. - */ - guessquote++; - else - guessquote--; - if (guessquote>=0) - close_single_quote++; - } - } - if (*s!=CHAR_SPACE && *s!='-' && *s!='.' && *s!=CHAR_ASTERISK && - *s!=13 && *s!=10) - isemptyline=0; /* ignore lines like * * * as spacers */ - if (*s==CHAR_UNDERSCORE) - c_unders++; - if (*s==CHAR_OPEN_CBRACK) - c_brack++; - if (*s==CHAR_CLOSE_CBRACK) - c_brack--; - if (*s==CHAR_OPEN_RBRACK) - r_brack++; - if (*s==CHAR_CLOSE_RBRACK) - r_brack--; - if (*s==CHAR_OPEN_SBRACK) - s_brack++; - if (*s==CHAR_CLOSE_SBRACK) - s_brack--; - s++; - } + isemptyline=analyse_quotes(aline,&counters); if (isnewpara && !isemptyline) { /* This line is the start of a new paragraph. */ @@ -2393,35 +2409,34 @@ if (isemptyline) { /* end of para - add up the totals */ - if (quot%2) + if (counters.quot%2) sprintf(dquote_err," Line %ld - Mismatched quotes\n", linecnt); - if (pswit[SQUOTE_SWITCH] && open_single_quote && - open_single_quote!=close_single_quote) + if (pswit[SQUOTE_SWITCH] && counters.open_single_quote && + counters.open_single_quote!=counters.close_single_quote) sprintf(squote_err," Line %ld - Mismatched singlequotes?\n", linecnt); - if (pswit[SQUOTE_SWITCH] && open_single_quote && - open_single_quote!=close_single_quote && - open_single_quote!=close_single_quote+1) + if (pswit[SQUOTE_SWITCH] && counters.open_single_quote && + counters.open_single_quote!=counters.close_single_quote && + counters.open_single_quote!=counters.close_single_quote+1) /* * Flag it to be noted regardless of the * first char of the next para. */ squot=1; - if (r_brack) + if (counters.r_brack) sprintf(rbrack_err," Line %ld - " "Mismatched round brackets?\n",linecnt); - if (s_brack) + if (counters.s_brack) sprintf(sbrack_err," Line %ld - " "Mismatched square brackets?\n",linecnt); - if (c_brack) + if (counters.c_brack) sprintf(cbrack_err," Line %ld - " "Mismatched curly brackets?\n",linecnt); - if (c_unders%2) + if (counters.c_unders%2) sprintf(unders_err," Line %ld - Mismatched underscores?\n", linecnt); - quot=s_brack=c_brack=r_brack=c_unders=open_single_quote= - close_single_quote=0; + memset(&counters,0,sizeof(counters)); /* let the next iteration know that it's starting a new para */ isnewpara=1; }