Break check_for_long_line() and check_for_short_line() out
authorali <ali@juiblex.co.uk>
Sat May 25 19:27:51 2013 +0100 (2013-05-25)
changeset 45d48f66b0ad0d
parent 44 66483ebc9b56
child 46 aa45307a6328
Break check_for_long_line() and check_for_short_line() out
bookloupe/bookloupe.c
     1.1 --- a/bookloupe/bookloupe.c	Sat May 25 19:14:21 2013 +0100
     1.2 +++ b/bookloupe/bookloupe.c	Sat May 25 19:27:51 2013 +0100
     1.3 @@ -1089,31 +1089,93 @@
     1.4  }
     1.5  
     1.6  /*
     1.7 + * check_for_long_line:
     1.8 + *
     1.9 + * Check for line too long.
    1.10 + */
    1.11 +void check_for_long_line(const char *aline)
    1.12 +{
    1.13 +    if (strlen(aline)>LONGEST_PG_LINE)
    1.14 +    {
    1.15 +	if (pswit[ECHO_SWITCH])
    1.16 +	    printf("\n%s\n",aline);
    1.17 +	if (!pswit[OVERVIEW_SWITCH])
    1.18 +	    printf("    Line %ld column %d - Long line %d\n",
    1.19 +	      linecnt,strlen(aline),strlen(aline));
    1.20 +	else
    1.21 +	    cnt_long++;
    1.22 +    }
    1.23 +}
    1.24 +
    1.25 +struct line_properties {
    1.26 +    unsigned int len,blen;
    1.27 +    char start;
    1.28 +};
    1.29 +
    1.30 +/*
    1.31 + * check_for_short_line:
    1.32 + *
    1.33 + * Check for line too short.
    1.34 + *
    1.35 + * This one is a bit trickier to implement: we don't want to
    1.36 + * flag the last line of a paragraph for being short, so we
    1.37 + * have to wait until we know that our current line is a
    1.38 + * "normal" line, then report the _previous_ line if it was too
    1.39 + * short. We also don't want to report indented lines like
    1.40 + * chapter heads or formatted quotations. We therefore keep
    1.41 + * last->len as the length of the last line examined, and
    1.42 + * last->blen as the length of the last but one, and try to
    1.43 + * suppress unnecessary warnings by checking that both were of
    1.44 + * "normal" length. We keep the first character of the last
    1.45 + * line in last->start, and if it was a space, we assume that
    1.46 + * the formatting is deliberate. I can't figure out a way to
    1.47 + * distinguish something like a quoted verse left-aligned or
    1.48 + * the header or footer of a letter from a paragraph of short
    1.49 + * lines - maybe if I examined the whole paragraph, and if the
    1.50 + * para has less than, say, 8 lines and if all lines are short,
    1.51 + * then just assume it's OK? Need to look at some texts to see
    1.52 + * how often a formula like this would get the right result.
    1.53 + */
    1.54 +void check_for_short_line(const char *aline,const struct line_properties *last)
    1.55 +{
    1.56 +    if (strlen(aline)>1 && last->len>1 && last->len<SHORTEST_PG_LINE &&
    1.57 +      last->blen>1 && last->blen>SHORTEST_PG_LINE && last->start!=CHAR_SPACE)
    1.58 +    {
    1.59 +	if (pswit[ECHO_SWITCH])
    1.60 +	    printf("\n%s\n",prevline);
    1.61 +	if (!pswit[OVERVIEW_SWITCH])
    1.62 +	    printf("    Line %ld column %d - Short line %d?\n",
    1.63 +	      linecnt-1,strlen(prevline),strlen(prevline));
    1.64 +	else
    1.65 +	    cnt_short++;
    1.66 +    }
    1.67 +}
    1.68 +
    1.69 +/*
    1.70   * procfile:
    1.71   *
    1.72   * Process one file.
    1.73   */
    1.74  void procfile(char *filename)
    1.75  {
    1.76 -    char *s,*t,*s1,laststart,*wordstart;
    1.77 +    char *s,*t,*s1,*wordstart;
    1.78      char inword[MAXWORDLEN],testword[MAXWORDLEN];
    1.79      char parastart[81];     /* first line of current para */
    1.80      FILE *infile;
    1.81      struct first_pass_results *first_pass_results;
    1.82      struct warnings *warnings;
    1.83      struct counters counters={0};
    1.84 +    struct line_properties last={0};
    1.85      int isemptyline;
    1.86      long squot,start_para_line;
    1.87      signed int i,j,llen,isacro,isellipsis,istypo,alower;
    1.88 -    unsigned int lastlen,lastblen;
    1.89      signed int dquotepar,squotepar;
    1.90      signed int isnewpara,vowel,consonant;
    1.91      char dquote_err[80],squote_err[80],rbrack_err[80],sbrack_err[80],
    1.92        cbrack_err[80],unders_err[80];
    1.93      signed int qword_index,qperiod_index,isdup;
    1.94      signed int enddash;
    1.95 -    laststart=CHAR_SPACE;
    1.96 -    lastlen=lastblen=0;
    1.97 +    last.start=CHAR_SPACE;
    1.98      *dquote_err=*squote_err=*rbrack_err=*cbrack_err=*sbrack_err=
    1.99        *unders_err=*prevline=0;
   1.100      linecnt=checked_linecnt=start_para_line=0;
   1.101 @@ -1150,8 +1212,6 @@
   1.102       * Re-init some variables we've dirtied.
   1.103       */
   1.104      squot=linecnt=0;
   1.105 -    laststart=CHAR_SPACE;
   1.106 -    lastlen=lastblen=0;
   1.107      while (flgets(aline,LINEBUFSIZE-1,infile,linecnt+1))
   1.108      {
   1.109          linecnt++;
   1.110 @@ -1324,56 +1384,13 @@
   1.111  	}
   1.112          if (warnings->bin)
   1.113  	    check_for_odd_characters(aline,warnings,isemptyline);
   1.114 -        /* Check for line too long. */
   1.115          if (warnings->longline)
   1.116 -	{
   1.117 -            if (strlen(aline)>LONGEST_PG_LINE)
   1.118 -	    {
   1.119 -                if (pswit[ECHO_SWITCH])
   1.120 -		    printf("\n%s\n",aline);
   1.121 -                if (!pswit[OVERVIEW_SWITCH])
   1.122 -                    printf("    Line %ld column %d - Long line %d\n",
   1.123 -		      linecnt,strlen(aline),strlen(aline));
   1.124 -                else
   1.125 -                    cnt_long++;
   1.126 -	    }
   1.127 -	}
   1.128 -        /*
   1.129 -	 * Check for line too short.
   1.130 -         * This one is a bit trickier to implement: we don't want to
   1.131 -         * flag the last line of a paragraph for being short, so we
   1.132 -         * have to wait until we know that our current line is a
   1.133 -         * "normal" line, then report the _previous_ line if it was too
   1.134 -         * short. We also don't want to report indented lines like
   1.135 -         * chapter heads or formatted quotations. We therefore keep
   1.136 -         * lastlen as the length of the last line examined, and
   1.137 -         * lastblen as the length of the last but one, and try to
   1.138 -         * suppress unnecessary warnings by checking that both were of
   1.139 -         * "normal" length. We keep the first character of the last
   1.140 -         * line in laststart, and if it was a space, we assume that the
   1.141 -         * formatting is deliberate. I can't figure out a way to
   1.142 -         * distinguish something like a quoted verse left-aligned or
   1.143 -         * the header or footer of a letter from a paragraph of short
   1.144 -         * lines - maybe if I examined the whole paragraph, and if the
   1.145 -         * para has less than, say, 8 lines and if all lines are short,
   1.146 -         * then just assume it's OK? Need to look at some texts to see
   1.147 -         * how often a formula like this would get the right result.
   1.148 -	 */
   1.149 -        if (warnings->shortline && strlen(aline)>1 && lastlen>1 &&
   1.150 -	  lastlen<SHORTEST_PG_LINE && lastblen>1 && lastblen>SHORTEST_PG_LINE &&
   1.151 -	  laststart!=CHAR_SPACE)
   1.152 -	{
   1.153 -	    if (pswit[ECHO_SWITCH])
   1.154 -		printf("\n%s\n",prevline);
   1.155 -	    if (!pswit[OVERVIEW_SWITCH])
   1.156 -		printf("    Line %ld column %d - Short line %d?\n",
   1.157 -		  linecnt-1,strlen(prevline),strlen(prevline));
   1.158 -	    else
   1.159 -		cnt_short++;
   1.160 -	}
   1.161 -        lastblen=lastlen;
   1.162 -        lastlen=strlen(aline);
   1.163 -        laststart=aline[0];
   1.164 +	    check_for_long_line(aline);
   1.165 +        if (warnings->shortline)
   1.166 +	    check_for_short_line(aline,&last);
   1.167 +        last.blen=last.len;
   1.168 +        last.len=strlen(aline);
   1.169 +        last.start=aline[0];
   1.170          /* Look for punctuation other than full ellipses at start of line. */
   1.171          if (*aline && strchr(".?!,;:",aline[0]) && strncmp(". . .",aline,5))
   1.172  	{
   1.173 @@ -2468,7 +2485,7 @@
   1.174               * If we say "start_para_line < linecnt - 1" it doesn't, but then it
   1.175               * misses genuine one-line paragraphs.
   1.176  	     */
   1.177 -            if (i && lastblen>2 && start_para_line<linecnt-1 &&
   1.178 +            if (i && last.blen>2 && start_para_line<linecnt-1 &&
   1.179  	      *prevline>CHAR_SPACE)
   1.180  	    {
   1.181                  for (i=strlen(prevline)-1;