1.1 --- a/bookloupe/bookloupe.c Mon Sep 23 21:18:27 2013 +0100
1.2 +++ b/bookloupe/bookloupe.c Wed Oct 02 23:51:18 2013 +0100
1.3 @@ -160,8 +160,7 @@
1.4 { NULL }
1.5 };
1.6
1.7 -long cnt_dquot; /* for overview mode, count of doublequote queries */
1.8 -long cnt_squot; /* for overview mode, count of singlequote queries */
1.9 +long cnt_quote; /* for overview mode, count of quote queries */
1.10 long cnt_brack; /* for overview mode, count of brackets queries */
1.11 long cnt_bin; /* for overview mode, count of non-ASCII queries */
1.12 long cnt_odd; /* for overview mode, count of odd character queries */
1.13 @@ -407,10 +406,8 @@
1.14 g_print(" Line-end problems: %14ld\n",cnt_lineend);
1.15 if (cnt_word)
1.16 g_print(" Common typos: %14ld\n",cnt_word);
1.17 - if (cnt_dquot)
1.18 - g_print(" Unmatched quotes: %14ld\n",cnt_dquot);
1.19 - if (cnt_squot)
1.20 - g_print(" Unmatched SingleQuotes: %14ld\n",cnt_squot);
1.21 + if (cnt_quote)
1.22 + g_print(" Unmatched quotes: %14ld\n",cnt_quote);
1.23 if (cnt_brack)
1.24 g_print(" Unmatched brackets: %14ld\n",cnt_brack);
1.25 if (cnt_bin)
1.26 @@ -425,8 +422,8 @@
1.27 g_print(" Possible HTML tags: %14ld\n",cnt_html);
1.28 g_print("\n");
1.29 g_print(" TOTAL QUERIES %14ld\n",
1.30 - cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+
1.31 - cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend);
1.32 + cnt_quote+cnt_brack+cnt_bin+cnt_odd+cnt_long+cnt_short+cnt_punct+
1.33 + cnt_dash+cnt_word+cnt_html+cnt_lineend);
1.34 }
1.35 g_free(running_from);
1.36 if (usertypo)
1.37 @@ -453,6 +450,7 @@
1.38 long spline=0,nspline=0;
1.39 static struct first_pass_results results={0};
1.40 gchar *inword;
1.41 + QuoteClass qc;
1.42 lines=g_strsplit(etext,"\n",0);
1.43 for (j=0;lines[j];j++)
1.44 {
1.45 @@ -507,9 +505,16 @@
1.46 results.binlen++;
1.47 if (g_unichar_isalpha(g_utf8_get_char(s)))
1.48 results.alphalen++;
1.49 - if (s>lines[j] && g_utf8_get_char(s)==CHAR_DQUOTE &&
1.50 - isalpha(g_utf8_get_char(g_utf8_prev_char(s))))
1.51 - results.endquote_count++;
1.52 + if (s>lines[j])
1.53 + {
1.54 + if (CHAR_IS_DQUOTE(g_utf8_get_char(s)))
1.55 + qc=QUOTE_CLASS(g_utf8_get_char(s));
1.56 + else
1.57 + qc=INVALID_QUOTE;
1.58 + if ((qc==CLOSING_QUOTE || qc==NEUTRAL_QUOTE) &&
1.59 + isalpha(g_utf8_get_char(g_utf8_prev_char(s))))
1.60 + results.endquote_count++;
1.61 + }
1.62 }
1.63 if (llen>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE && lastblen>2 &&
1.64 lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE)
1.65 @@ -788,7 +793,7 @@
1.66 *
1.67 * Returns: TRUE if the line is empty.
1.68 */
1.69 -gboolean analyse_quotes(const char *aline,struct counters *counters)
1.70 +gboolean analyse_quotes(const char *aline,int linecnt,struct counters *counters)
1.71 {
1.72 int guessquote=0;
1.73 /* assume the line is empty until proven otherwise */
1.74 @@ -796,23 +801,24 @@
1.75 const char *s=aline,*sprev,*snext;
1.76 gunichar c;
1.77 sprev=NULL;
1.78 + GError *tmp_err=NULL;
1.79 while (*s)
1.80 {
1.81 snext=g_utf8_next_char(s);
1.82 c=g_utf8_get_char(s);
1.83 - if (c==CHAR_DQUOTE)
1.84 - counters->quot++;
1.85 - if (CHAR_IS_SQUOTE(c))
1.86 + if (CHAR_IS_DQUOTE(c))
1.87 + (void)count_quote(counters,c,QUOTE_CLASS(c),&tmp_err);
1.88 + else if (CHAR_IS_SQUOTE(c) && pswit[SQUOTE_SWITCH])
1.89 {
1.90 if (s==aline)
1.91 {
1.92 /*
1.93 - * At start of line, it can only be an openquote.
1.94 + * At start of line, it can only be a quotation mark.
1.95 * Hardcode a very common exception!
1.96 */
1.97 if (!g_str_has_prefix(snext,"tis") &&
1.98 !g_str_has_prefix(snext,"Tis"))
1.99 - increment_matching(counters,c,TRUE);
1.100 + (void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err);
1.101 }
1.102 else if (g_unichar_isalpha(g_utf8_get_char(sprev)) &&
1.103 g_unichar_isalpha(g_utf8_get_char(snext)))
1.104 @@ -822,15 +828,20 @@
1.105 else if (c==CHAR_OPEN_SQUOTE || c==CHAR_LS_QUOTE ||
1.106 g_unichar_isalpha(g_utf8_get_char(snext)))
1.107 {
1.108 - /* it damwell better BE an openquote */
1.109 + /* certainly looks like a quotation mark */
1.110 if (!g_str_has_prefix(snext,"tis") &&
1.111 !g_str_has_prefix(snext,"Tis"))
1.112 /* hardcode a very common exception! */
1.113 - increment_matching(counters,c,TRUE);
1.114 + {
1.115 + if (strchr(".?!,;:",g_utf8_get_char(sprev)))
1.116 + (void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err);
1.117 + else
1.118 + (void)count_quote(counters,c,OPENING_QUOTE,&tmp_err);
1.119 + }
1.120 }
1.121 else
1.122 {
1.123 - /* now - is it a closequote? */
1.124 + /* now - is it a quotation mark? */
1.125 guessquote=0; /* accumulate clues */
1.126 if (g_unichar_isalpha(g_utf8_get_char(sprev)))
1.127 {
1.128 @@ -844,25 +855,31 @@
1.129 /* bonus marks! */
1.130 guessquote-=2;
1.131 }
1.132 + if (innermost_quote_matches(counters,c))
1.133 + /*
1.134 + * Give it the benefit of some doubt,
1.135 + * if a squote is already open.
1.136 + */
1.137 + guessquote++;
1.138 + else
1.139 + guessquote--;
1.140 + if (guessquote>=0)
1.141 + (void)count_quote(counters,c,CLOSING_QUOTE,&tmp_err);
1.142 }
1.143 - /* it doesn't have a letter either side */
1.144 - else if (strchr(".?!,;:",g_utf8_get_char(sprev)) &&
1.145 - strchr(".?!,;: ",g_utf8_get_char(snext)))
1.146 - guessquote+=8; /* looks like a closequote */
1.147 else
1.148 - guessquote++;
1.149 - if (matching_difference(counters,CHAR_SQUOTE)>0)
1.150 - /*
1.151 - * Give it the benefit of some doubt,
1.152 - * if a squote is already open.
1.153 - */
1.154 - guessquote++;
1.155 - else
1.156 - guessquote--;
1.157 - if (guessquote>=0)
1.158 - increment_matching(counters,c,FALSE);
1.159 + /* no adjacent letter - it must be a quote of some kind */
1.160 + (void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err);
1.161 }
1.162 }
1.163 + if (tmp_err)
1.164 + {
1.165 + if (pswit[ECHO_SWITCH])
1.166 + g_print("\n%s\n",aline);
1.167 + if (!pswit[OVERVIEW_SWITCH])
1.168 + g_print(" Line %ld column %ld - %s\n",
1.169 + linecnt,g_utf8_pointer_to_offset(aline,s)+1,tmp_err->message);
1.170 + g_clear_error(&tmp_err);
1.171 + }
1.172 if (c!=CHAR_SPACE && c!='-' && c!='.' && c!=CHAR_ASTERISK &&
1.173 c!='\r' && c!='\n')
1.174 isemptyline=FALSE; /* ignore lines like * * * as spacers */
1.175 @@ -1779,6 +1796,7 @@
1.176 gboolean isacro,isellipsis;
1.177 const char *s;
1.178 gunichar c,nc,pc,n2c;
1.179 + int parity;
1.180 c=g_utf8_get_char(aline);
1.181 nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
1.182 for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
1.183 @@ -1917,7 +1935,7 @@
1.184 c=nc;
1.185 nc=g_utf8_get_char(g_utf8_next_char(s));
1.186 /* for each character in the line after the first */
1.187 - if (c==CHAR_DQUOTE)
1.188 + if (CHAR_IS_DQUOTE(c))
1.189 {
1.190 if (!g_utf8_strchr(" _-.'`,;:!/([{?}])",-1,pc) &&
1.191 !g_utf8_strchr(" _-.'`,;:!/([{?}])",-1,nc) && nc ||
1.192 @@ -1939,10 +1957,18 @@
1.193 {
1.194 c=nc;
1.195 nc=g_utf8_get_char(g_utf8_next_char(s));
1.196 - if (c==CHAR_DQUOTE)
1.197 + if (CHAR_IS_DQUOTE(c))
1.198 {
1.199 - parities->dquote=!parities->dquote;
1.200 - if (!parities->dquote)
1.201 + if (c==CHAR_DQUOTE)
1.202 + {
1.203 + parities->dquote=!parities->dquote;
1.204 + parity=parities->dquote;
1.205 + }
1.206 + else if (c==CHAR_LD_QUOTE)
1.207 + parity=1;
1.208 + else
1.209 + parity=0;
1.210 + if (!parity)
1.211 {
1.212 /* parity even */
1.213 if (!g_utf8_strchr("_-.'`/,;:!?)]} ",-1,nc))
1.214 @@ -1975,7 +2001,8 @@
1.215 }
1.216 }
1.217 }
1.218 - if (g_utf8_get_char(aline)==CHAR_DQUOTE)
1.219 + c=g_utf8_get_char(aline);
1.220 + if (CHAR_IS_DQUOTE(c))
1.221 {
1.222 if (g_utf8_strchr(",;:!?)]} ",-1,
1.223 g_utf8_get_char(g_utf8_next_char(aline))))
1.224 @@ -2200,7 +2227,7 @@
1.225 s=g_utf8_prev_char(aline+lbytes);
1.226 c1=g_utf8_get_char(s);
1.227 c2=g_utf8_get_char(g_utf8_prev_char(s));
1.228 - if ((c1==CHAR_DQUOTE || CHAR_IS_SQUOTE(c1)) && c2==CHAR_SPACE)
1.229 + if ((CHAR_IS_DQUOTE(c1) || CHAR_IS_SQUOTE(c1)) && c2==CHAR_SPACE)
1.230 {
1.231 if (pswit[ECHO_SWITCH])
1.232 g_print("\n%s\n",aline);
1.233 @@ -2285,15 +2312,17 @@
1.234 {
1.235 const char *s;
1.236 gunichar c,nc,pc;
1.237 + QuoteClass qc;
1.238 c=g_utf8_get_char(aline);
1.239 nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
1.240 for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
1.241 {
1.242 pc=c;
1.243 c=nc;
1.244 + qc=CHAR_IS_DQUOTE(c)?QUOTE_CLASS(c):INVALID_QUOTE;
1.245 nc=g_utf8_get_char(g_utf8_next_char(s));
1.246 /* for each character in the line except 1st */
1.247 - if (c==CHAR_DQUOTE && isalpha(pc))
1.248 + if ((qc==CLOSING_QUOTE || qc==NEUTRAL_QUOTE) && isalpha(pc))
1.249 {
1.250 if (pswit[ECHO_SWITCH])
1.251 g_print("\n%s\n",aline);
1.252 @@ -2396,6 +2425,7 @@
1.253 gboolean letter_on_line=FALSE;
1.254 const char *s;
1.255 gunichar c;
1.256 + gboolean closing_quote;
1.257 for (s=prevline;*s;s=g_utf8_next_char(s))
1.258 if (g_unichar_isalpha(g_utf8_get_char(s)))
1.259 {
1.260 @@ -2417,7 +2447,11 @@
1.261 {
1.262 s=g_utf8_prev_char(s);
1.263 c=g_utf8_get_char(s);
1.264 - } while (CHAR_IS_CLOSING_QUOTE(c) && c>CHAR_SPACE && s>prevline);
1.265 + if (QUOTE_CLASS(c)==CLOSING_QUOTE || QUOTE_CLASS(c)==NEUTRAL_QUOTE)
1.266 + closing_quote=TRUE;
1.267 + else
1.268 + closing_quote=FALSE;
1.269 + } while (closing_quote && s>prevline);
1.270 for (;s>prevline;s=g_utf8_prev_char(s))
1.271 {
1.272 if (g_unichar_isalpha(g_utf8_get_char(s)))
1.273 @@ -2548,7 +2582,7 @@
1.274 }
1.275 checked_linecnt++;
1.276 print_pending(aline,parastart,&pending);
1.277 - isemptyline=analyse_quotes(aline,&counters);
1.278 + isemptyline=analyse_quotes(aline,linecnt,&counters);
1.279 if (isnewpara && !isemptyline)
1.280 {
1.281 /* This line is the start of a new paragraph. */