1.1 --- a/bookloupe/bookloupe.c Sat May 25 19:14:21 2013 +0100
1.2 +++ b/bookloupe/bookloupe.c Sat May 25 19:27:51 2013 +0100
1.3 @@ -1089,31 +1089,93 @@
1.4 }
1.5
1.6 /*
1.7 + * check_for_long_line:
1.8 + *
1.9 + * Check for line too long.
1.10 + */
1.11 +void check_for_long_line(const char *aline)
1.12 +{
1.13 + if (strlen(aline)>LONGEST_PG_LINE)
1.14 + {
1.15 + if (pswit[ECHO_SWITCH])
1.16 + printf("\n%s\n",aline);
1.17 + if (!pswit[OVERVIEW_SWITCH])
1.18 + printf(" Line %ld column %d - Long line %d\n",
1.19 + linecnt,strlen(aline),strlen(aline));
1.20 + else
1.21 + cnt_long++;
1.22 + }
1.23 +}
1.24 +
1.25 +struct line_properties {
1.26 + unsigned int len,blen;
1.27 + char start;
1.28 +};
1.29 +
1.30 +/*
1.31 + * check_for_short_line:
1.32 + *
1.33 + * Check for line too short.
1.34 + *
1.35 + * This one is a bit trickier to implement: we don't want to
1.36 + * flag the last line of a paragraph for being short, so we
1.37 + * have to wait until we know that our current line is a
1.38 + * "normal" line, then report the _previous_ line if it was too
1.39 + * short. We also don't want to report indented lines like
1.40 + * chapter heads or formatted quotations. We therefore keep
1.41 + * last->len as the length of the last line examined, and
1.42 + * last->blen as the length of the last but one, and try to
1.43 + * suppress unnecessary warnings by checking that both were of
1.44 + * "normal" length. We keep the first character of the last
1.45 + * line in last->start, and if it was a space, we assume that
1.46 + * the formatting is deliberate. I can't figure out a way to
1.47 + * distinguish something like a quoted verse left-aligned or
1.48 + * the header or footer of a letter from a paragraph of short
1.49 + * lines - maybe if I examined the whole paragraph, and if the
1.50 + * para has less than, say, 8 lines and if all lines are short,
1.51 + * then just assume it's OK? Need to look at some texts to see
1.52 + * how often a formula like this would get the right result.
1.53 + */
1.54 +void check_for_short_line(const char *aline,const struct line_properties *last)
1.55 +{
1.56 + if (strlen(aline)>1 && last->len>1 && last->len<SHORTEST_PG_LINE &&
1.57 + last->blen>1 && last->blen>SHORTEST_PG_LINE && last->start!=CHAR_SPACE)
1.58 + {
1.59 + if (pswit[ECHO_SWITCH])
1.60 + printf("\n%s\n",prevline);
1.61 + if (!pswit[OVERVIEW_SWITCH])
1.62 + printf(" Line %ld column %d - Short line %d?\n",
1.63 + linecnt-1,strlen(prevline),strlen(prevline));
1.64 + else
1.65 + cnt_short++;
1.66 + }
1.67 +}
1.68 +
1.69 +/*
1.70 * procfile:
1.71 *
1.72 * Process one file.
1.73 */
1.74 void procfile(char *filename)
1.75 {
1.76 - char *s,*t,*s1,laststart,*wordstart;
1.77 + char *s,*t,*s1,*wordstart;
1.78 char inword[MAXWORDLEN],testword[MAXWORDLEN];
1.79 char parastart[81]; /* first line of current para */
1.80 FILE *infile;
1.81 struct first_pass_results *first_pass_results;
1.82 struct warnings *warnings;
1.83 struct counters counters={0};
1.84 + struct line_properties last={0};
1.85 int isemptyline;
1.86 long squot,start_para_line;
1.87 signed int i,j,llen,isacro,isellipsis,istypo,alower;
1.88 - unsigned int lastlen,lastblen;
1.89 signed int dquotepar,squotepar;
1.90 signed int isnewpara,vowel,consonant;
1.91 char dquote_err[80],squote_err[80],rbrack_err[80],sbrack_err[80],
1.92 cbrack_err[80],unders_err[80];
1.93 signed int qword_index,qperiod_index,isdup;
1.94 signed int enddash;
1.95 - laststart=CHAR_SPACE;
1.96 - lastlen=lastblen=0;
1.97 + last.start=CHAR_SPACE;
1.98 *dquote_err=*squote_err=*rbrack_err=*cbrack_err=*sbrack_err=
1.99 *unders_err=*prevline=0;
1.100 linecnt=checked_linecnt=start_para_line=0;
1.101 @@ -1150,8 +1212,6 @@
1.102 * Re-init some variables we've dirtied.
1.103 */
1.104 squot=linecnt=0;
1.105 - laststart=CHAR_SPACE;
1.106 - lastlen=lastblen=0;
1.107 while (flgets(aline,LINEBUFSIZE-1,infile,linecnt+1))
1.108 {
1.109 linecnt++;
1.110 @@ -1324,56 +1384,13 @@
1.111 }
1.112 if (warnings->bin)
1.113 check_for_odd_characters(aline,warnings,isemptyline);
1.114 - /* Check for line too long. */
1.115 if (warnings->longline)
1.116 - {
1.117 - if (strlen(aline)>LONGEST_PG_LINE)
1.118 - {
1.119 - if (pswit[ECHO_SWITCH])
1.120 - printf("\n%s\n",aline);
1.121 - if (!pswit[OVERVIEW_SWITCH])
1.122 - printf(" Line %ld column %d - Long line %d\n",
1.123 - linecnt,strlen(aline),strlen(aline));
1.124 - else
1.125 - cnt_long++;
1.126 - }
1.127 - }
1.128 - /*
1.129 - * Check for line too short.
1.130 - * This one is a bit trickier to implement: we don't want to
1.131 - * flag the last line of a paragraph for being short, so we
1.132 - * have to wait until we know that our current line is a
1.133 - * "normal" line, then report the _previous_ line if it was too
1.134 - * short. We also don't want to report indented lines like
1.135 - * chapter heads or formatted quotations. We therefore keep
1.136 - * lastlen as the length of the last line examined, and
1.137 - * lastblen as the length of the last but one, and try to
1.138 - * suppress unnecessary warnings by checking that both were of
1.139 - * "normal" length. We keep the first character of the last
1.140 - * line in laststart, and if it was a space, we assume that the
1.141 - * formatting is deliberate. I can't figure out a way to
1.142 - * distinguish something like a quoted verse left-aligned or
1.143 - * the header or footer of a letter from a paragraph of short
1.144 - * lines - maybe if I examined the whole paragraph, and if the
1.145 - * para has less than, say, 8 lines and if all lines are short,
1.146 - * then just assume it's OK? Need to look at some texts to see
1.147 - * how often a formula like this would get the right result.
1.148 - */
1.149 - if (warnings->shortline && strlen(aline)>1 && lastlen>1 &&
1.150 - lastlen<SHORTEST_PG_LINE && lastblen>1 && lastblen>SHORTEST_PG_LINE &&
1.151 - laststart!=CHAR_SPACE)
1.152 - {
1.153 - if (pswit[ECHO_SWITCH])
1.154 - printf("\n%s\n",prevline);
1.155 - if (!pswit[OVERVIEW_SWITCH])
1.156 - printf(" Line %ld column %d - Short line %d?\n",
1.157 - linecnt-1,strlen(prevline),strlen(prevline));
1.158 - else
1.159 - cnt_short++;
1.160 - }
1.161 - lastblen=lastlen;
1.162 - lastlen=strlen(aline);
1.163 - laststart=aline[0];
1.164 + check_for_long_line(aline);
1.165 + if (warnings->shortline)
1.166 + check_for_short_line(aline,&last);
1.167 + last.blen=last.len;
1.168 + last.len=strlen(aline);
1.169 + last.start=aline[0];
1.170 /* Look for punctuation other than full ellipses at start of line. */
1.171 if (*aline && strchr(".?!,;:",aline[0]) && strncmp(". . .",aline,5))
1.172 {
1.173 @@ -2468,7 +2485,7 @@
1.174 * If we say "start_para_line < linecnt - 1" it doesn't, but then it
1.175 * misses genuine one-line paragraphs.
1.176 */
1.177 - if (i && lastblen>2 && start_para_line<linecnt-1 &&
1.178 + if (i && last.blen>2 && start_para_line<linecnt-1 &&
1.179 *prevline>CHAR_SPACE)
1.180 {
1.181 for (i=strlen(prevline)-1;