Break analyse_quotes() out
authorali <ali@juiblex.co.uk>
Sat May 25 17:01:36 2013 +0100 (2013-05-25)
changeset 43e4042a067753
parent 42 20d51419e077
child 44 66483ebc9b56
Break analyse_quotes() out
bookloupe/bookloupe.c
     1.1 --- a/bookloupe/bookloupe.c	Sat May 25 09:40:56 2013 +0100
     1.2 +++ b/bookloupe/bookloupe.c	Sat May 25 17:01:36 2013 +0100
     1.3 @@ -888,6 +888,109 @@
     1.4      return &warnings;
     1.5  }
     1.6  
     1.7 +struct counters {
     1.8 +    long quot;
     1.9 +    signed int c_unders,c_brack,s_brack,r_brack;
    1.10 +    signed int open_single_quote,close_single_quote;
    1.11 +};
    1.12 +
    1.13 +/*
    1.14 + * analyse_quotes:
    1.15 + *
    1.16 + * Look along the line, accumulate the count of quotes, and see
    1.17 + * if this is an empty line - i.e. a line with nothing on it
    1.18 + * but spaces.
    1.19 + * If line has just spaces, period, * and/or - on it, don't
    1.20 + * count it, since empty lines with asterisks or dashes to
    1.21 + * separate sections are common.
    1.22 + *
    1.23 + * Returns: Non-zero if the line is empty.
    1.24 + */
    1.25 +int analyse_quotes(const char *s,struct counters *counters)
    1.26 +{
    1.27 +    signed int guessquote=0;
    1.28 +    int isemptyline=1;    /* assume the line is empty until proven otherwise */
    1.29 +    while (*s)
    1.30 +    {
    1.31 +	if (*s==CHAR_DQUOTE)
    1.32 +	    counters->quot++;
    1.33 +	if (*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE)
    1.34 +	{
    1.35 +	    if (s==aline)
    1.36 +	    {
    1.37 +		/*
    1.38 +		 * At start of line, it can only be an openquote.
    1.39 +		 * Hardcode a very common exception!
    1.40 +		 */
    1.41 +		if (strncmp(s+2,"tis",3) && strncmp(s+2,"Tis",3))
    1.42 +		    counters->open_single_quote++;
    1.43 +	    }
    1.44 +	    else if (gcisalpha(s[-1]) && gcisalpha(s[1]))
    1.45 +		/* Do nothing! it's definitely an apostrophe, not a quote */
    1.46 +		;
    1.47 +	    /* it's outside a word - let's check it out */
    1.48 +	    else if (*s==CHAR_OPEN_SQUOTE || gcisalpha(s[1]))
    1.49 +	    {
    1.50 +		/* it damwell better BE an openquote */
    1.51 +		if (strncmp(s+1,"tis",3) && strncmp(s+1,"Tis",3))
    1.52 +		    /* hardcode a very common exception! */
    1.53 +		    counters->open_single_quote++;
    1.54 +	    }
    1.55 +	    else
    1.56 +	    {
    1.57 +		/* now - is it a closequote? */
    1.58 +		guessquote=0;   /* accumulate clues */
    1.59 +		if (gcisalpha(s[-1]))
    1.60 +		{
    1.61 +		    /* it follows a letter - could be either */
    1.62 +		    guessquote++;
    1.63 +		    if (s[-1]=='s')
    1.64 +		    {
    1.65 +			/* looks like a plural apostrophe */
    1.66 +			guessquote-=3;
    1.67 +			if (s[1]==CHAR_SPACE)  /* bonus marks! */
    1.68 +			    guessquote-=2;
    1.69 +		    }
    1.70 +		}
    1.71 +		/* it doesn't have a letter either side */
    1.72 +		else if (strchr(".?!,;:",s[-1]) && strchr(".?!,;: ",s[1]))
    1.73 +		    guessquote+=8; /* looks like a closequote */
    1.74 +		else
    1.75 +		    guessquote++;
    1.76 +		if (counters->open_single_quote>counters->close_single_quote)
    1.77 +		    /*
    1.78 +		     * Give it the benefit of some doubt,
    1.79 +		     * if a squote is already open.
    1.80 +		     */
    1.81 +		    guessquote++;
    1.82 +		else
    1.83 +		    guessquote--;
    1.84 +		if (guessquote>=0)
    1.85 +		    counters->close_single_quote++;
    1.86 +	    }
    1.87 +	}
    1.88 +	if (*s!=CHAR_SPACE && *s!='-' && *s!='.' && *s!=CHAR_ASTERISK &&
    1.89 +	  *s!=13 && *s!=10)
    1.90 +	    isemptyline=0;  /* ignore lines like  *  *  *  as spacers */
    1.91 +	if (*s==CHAR_UNDERSCORE)
    1.92 +	    counters->c_unders++;
    1.93 +	if (*s==CHAR_OPEN_CBRACK)
    1.94 +	    counters->c_brack++;
    1.95 +	if (*s==CHAR_CLOSE_CBRACK)
    1.96 +	    counters->c_brack--;
    1.97 +	if (*s==CHAR_OPEN_RBRACK)
    1.98 +	    counters->r_brack++;
    1.99 +	if (*s==CHAR_CLOSE_RBRACK)
   1.100 +	    counters->r_brack--;
   1.101 +	if (*s==CHAR_OPEN_SBRACK)
   1.102 +	    counters->s_brack++;
   1.103 +	if (*s==CHAR_CLOSE_SBRACK)
   1.104 +	    counters->s_brack--;
   1.105 +	s++;
   1.106 +    }
   1.107 +    return isemptyline;
   1.108 +}
   1.109 +
   1.110  /*
   1.111   * procfile:
   1.112   *
   1.113 @@ -901,13 +1004,13 @@
   1.114      FILE *infile;
   1.115      struct first_pass_results *first_pass_results;
   1.116      struct warnings *warnings;
   1.117 -    long quot,squot,start_para_line;
   1.118 -    signed int i,j,llen,isemptyline,isacro,isellipsis,istypo,alower,
   1.119 +    struct counters counters={0};
   1.120 +    int isemptyline;
   1.121 +    long squot,start_para_line;
   1.122 +    signed int i,j,llen,isacro,isellipsis,istypo,alower,
   1.123        eNon_A,eTab,eTilde,eAst,eFSlash,eCarat;
   1.124      unsigned int lastlen,lastblen;
   1.125 -    signed int s_brack,c_brack,r_brack,c_unders;
   1.126 -    signed int open_single_quote,close_single_quote,guessquote,dquotepar,
   1.127 -      squotepar;
   1.128 +    signed int dquotepar,squotepar;
   1.129      signed int isnewpara,vowel,consonant;
   1.130      char dquote_err[80],squote_err[80],rbrack_err[80],sbrack_err[80],
   1.131        cbrack_err[80],unders_err[80];
   1.132 @@ -918,12 +1021,12 @@
   1.133      *dquote_err=*squote_err=*rbrack_err=*cbrack_err=*sbrack_err=
   1.134        *unders_err=*prevline=0;
   1.135      linecnt=checked_linecnt=start_para_line=0;
   1.136 -    quot=squot=s_brack=c_brack=r_brack=c_unders=0;
   1.137 -    i=llen=isemptyline=isacro=isellipsis=istypo=0;
   1.138 +    squot=0;
   1.139 +    i=llen=isacro=isellipsis=istypo=0;
   1.140      isnewpara=vowel=consonant=enddash=0;
   1.141      qword_index=qperiod_index=isdup=0;
   1.142      *inword=*testword=0;
   1.143 -    open_single_quote=close_single_quote=guessquote=dquotepar=squotepar=0;
   1.144 +    dquotepar=squotepar=0;
   1.145      for (j=0;j<MAX_QWORD;j++)
   1.146      {
   1.147          dupcnt[j]=0;
   1.148 @@ -950,7 +1053,7 @@
   1.149       * Here we go with the main pass. Hold onto yer hat!
   1.150       * Re-init some variables we've dirtied.
   1.151       */
   1.152 -    quot=squot=linecnt=0;
   1.153 +    squot=linecnt=0;
   1.154      laststart=CHAR_SPACE;
   1.155      lastlen=lastblen=0;
   1.156      while (flgets(aline,LINEBUFSIZE-1,infile,linecnt+1))
   1.157 @@ -979,7 +1082,6 @@
   1.158  	}
   1.159          checked_linecnt++;
   1.160          s=aline;
   1.161 -        isemptyline=1;    /* assume the line is empty until proven otherwise */
   1.162          /*
   1.163  	 * If we are in a state of unbalanced quotes, and this line
   1.164           * doesn't begin with a quote, output the stored error message.
   1.165 @@ -1063,93 +1165,7 @@
   1.166  	}
   1.167          *dquote_err=*squote_err=*rbrack_err=*cbrack_err= 
   1.168  	  *sbrack_err=*unders_err=0;
   1.169 -	/*
   1.170 -         * Look along the line, accumulate the count of quotes, and see
   1.171 -         * if this is an empty line - i.e. a line with nothing on it
   1.172 -         * but spaces.
   1.173 -         * If line has just spaces, period, * and/or - on it, don't
   1.174 -         * count it, since empty lines with asterisks or dashes to
   1.175 -         * separate sections are common.
   1.176 -	 */
   1.177 -        s=aline;
   1.178 -        while (*s)
   1.179 -	{
   1.180 -            if (*s==CHAR_DQUOTE)
   1.181 -		quot++;
   1.182 -            if (*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE)
   1.183 -	    {
   1.184 -                if (s==aline)
   1.185 -		{
   1.186 -		    /*
   1.187 -		     * At start of line, it can only be an openquote.
   1.188 -		     * Hardcode a very common exception!
   1.189 -		     */
   1.190 -                    if (strncmp(s+2,"tis",3) && strncmp(s+2,"Tis",3))
   1.191 -                        open_single_quote++;
   1.192 -		}
   1.193 -                else if (gcisalpha(*(s-1)) && gcisalpha(*(s+1)))
   1.194 -		    /* Do nothing! it's definitely an apostrophe, not a quote */
   1.195 -		    ;
   1.196 -		/* it's outside a word - let's check it out */
   1.197 -		else if (*s==CHAR_OPEN_SQUOTE || gcisalpha(*(s+1)))
   1.198 -		{
   1.199 -		    /* it damwell better BE an openquote */
   1.200 -		    if (strncmp(s+1,"tis",3) && strncmp(s+1,"Tis",3))
   1.201 -			/* hardcode a very common exception! */
   1.202 -			open_single_quote++;
   1.203 -		}
   1.204 -		else
   1.205 -		{
   1.206 -		    /* now - is it a closequote? */
   1.207 -		    guessquote=0;   /* accumulate clues */
   1.208 -		    if (gcisalpha(s[-1]))
   1.209 -		    {
   1.210 -			/* it follows a letter - could be either */
   1.211 -			guessquote+=1;
   1.212 -			if (s[-1]=='s')
   1.213 -			{
   1.214 -			    /* looks like a plural apostrophe */
   1.215 -			    guessquote-=3;
   1.216 -			    if (s[1]==CHAR_SPACE)  /* bonus marks! */
   1.217 -				guessquote-=2;
   1.218 -			}
   1.219 -		    }
   1.220 -		    /* it doesn't have a letter either side */
   1.221 -		    else if (strchr(".?!,;:",s[-1]) && strchr(".?!,;: ",s[1]))
   1.222 -			guessquote+=8; /* looks like a closequote */
   1.223 -		    else
   1.224 -			guessquote++;
   1.225 -		    if (open_single_quote>close_single_quote)
   1.226 -			/*
   1.227 -			 * Give it the benefit of some doubt,
   1.228 -			 * if a squote is already open.
   1.229 -			 */
   1.230 -			guessquote++;
   1.231 -		    else
   1.232 -			guessquote--;
   1.233 -		    if (guessquote>=0)
   1.234 -			close_single_quote++;
   1.235 -		}
   1.236 -	    }
   1.237 -	    if (*s!=CHAR_SPACE && *s!='-' && *s!='.' && *s!=CHAR_ASTERISK &&
   1.238 -	      *s!=13 && *s!=10)
   1.239 -		isemptyline=0;  /* ignore lines like  *  *  *  as spacers */
   1.240 -	    if (*s==CHAR_UNDERSCORE)
   1.241 -		c_unders++;
   1.242 -	    if (*s==CHAR_OPEN_CBRACK)
   1.243 -		c_brack++;
   1.244 -	    if (*s==CHAR_CLOSE_CBRACK)
   1.245 -		c_brack--;
   1.246 -	    if (*s==CHAR_OPEN_RBRACK)
   1.247 -		r_brack++;
   1.248 -	    if (*s==CHAR_CLOSE_RBRACK)
   1.249 -		r_brack--;
   1.250 -	    if (*s==CHAR_OPEN_SBRACK)
   1.251 -		s_brack++;
   1.252 -	    if (*s==CHAR_CLOSE_SBRACK)
   1.253 -		s_brack--;
   1.254 -	    s++;
   1.255 -	}
   1.256 +	isemptyline=analyse_quotes(aline,&counters);
   1.257          if (isnewpara && !isemptyline)
   1.258  	{
   1.259  	    /* This line is the start of a new paragraph. */
   1.260 @@ -2393,35 +2409,34 @@
   1.261          if (isemptyline)
   1.262  	{
   1.263  	    /* end of para - add up the totals */
   1.264 -            if (quot%2)
   1.265 +            if (counters.quot%2)
   1.266                  sprintf(dquote_err,"    Line %ld - Mismatched quotes\n",
   1.267  		  linecnt);
   1.268 -            if (pswit[SQUOTE_SWITCH] && open_single_quote &&
   1.269 -	      open_single_quote!=close_single_quote)
   1.270 +            if (pswit[SQUOTE_SWITCH] && counters.open_single_quote &&
   1.271 +	      counters.open_single_quote!=counters.close_single_quote)
   1.272                  sprintf(squote_err,"    Line %ld - Mismatched singlequotes?\n",
   1.273  		  linecnt);
   1.274 -            if (pswit[SQUOTE_SWITCH] && open_single_quote &&
   1.275 -	      open_single_quote!=close_single_quote &&
   1.276 -	      open_single_quote!=close_single_quote+1)
   1.277 +            if (pswit[SQUOTE_SWITCH] && counters.open_single_quote &&
   1.278 +	      counters.open_single_quote!=counters.close_single_quote &&
   1.279 +	      counters.open_single_quote!=counters.close_single_quote+1)
   1.280  		/*
   1.281  		 * Flag it to be noted regardless of the
   1.282  		 * first char of the next para.
   1.283  		 */
   1.284                  squot=1;
   1.285 -            if (r_brack)
   1.286 +            if (counters.r_brack)
   1.287                  sprintf(rbrack_err,"    Line %ld - "
   1.288  		  "Mismatched round brackets?\n",linecnt);
   1.289 -            if (s_brack)
   1.290 +            if (counters.s_brack)
   1.291                  sprintf(sbrack_err,"    Line %ld - "
   1.292  		  "Mismatched square brackets?\n",linecnt);
   1.293 -            if (c_brack)
   1.294 +            if (counters.c_brack)
   1.295                  sprintf(cbrack_err,"    Line %ld - "
   1.296  		  "Mismatched curly brackets?\n",linecnt);
   1.297 -            if (c_unders%2)
   1.298 +            if (counters.c_unders%2)
   1.299                  sprintf(unders_err,"    Line %ld - Mismatched underscores?\n",
   1.300  		  linecnt);
   1.301 -            quot=s_brack=c_brack=r_brack=c_unders=open_single_quote=
   1.302 -	      close_single_quote=0;
   1.303 +	    memset(&counters,0,sizeof(counters));
   1.304  	    /* let the next iteration know that it's starting a new para */
   1.305              isnewpara=1;
   1.306  	}