diff -r adc06e9e8470 -r 8c2d6a0cf717 bookloupe/bookloupe.c --- a/bookloupe/bookloupe.c Mon Sep 23 21:18:27 2013 +0100 +++ b/bookloupe/bookloupe.c Tue Sep 24 07:18:50 2013 +0100 @@ -160,8 +160,7 @@ { NULL } }; -long cnt_dquot; /* for overview mode, count of doublequote queries */ -long cnt_squot; /* for overview mode, count of singlequote queries */ +long cnt_quote; /* for overview mode, count of quote queries */ long cnt_brack; /* for overview mode, count of brackets queries */ long cnt_bin; /* for overview mode, count of non-ASCII queries */ long cnt_odd; /* for overview mode, count of odd character queries */ @@ -407,10 +406,8 @@ g_print(" Line-end problems: %14ld\n",cnt_lineend); if (cnt_word) g_print(" Common typos: %14ld\n",cnt_word); - if (cnt_dquot) - g_print(" Unmatched quotes: %14ld\n",cnt_dquot); - if (cnt_squot) - g_print(" Unmatched SingleQuotes: %14ld\n",cnt_squot); + if (cnt_quote) + g_print(" Unmatched quotes: %14ld\n",cnt_quote); if (cnt_brack) g_print(" Unmatched brackets: %14ld\n",cnt_brack); if (cnt_bin) @@ -425,8 +422,8 @@ g_print(" Possible HTML tags: %14ld\n",cnt_html); g_print("\n"); g_print(" TOTAL QUERIES %14ld\n", - cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+ - cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend); + cnt_quote+cnt_brack+cnt_bin+cnt_odd+cnt_long+cnt_short+cnt_punct+ + cnt_dash+cnt_word+cnt_html+cnt_lineend); } g_free(running_from); if (usertypo) @@ -453,6 +450,7 @@ long spline=0,nspline=0; static struct first_pass_results results={0}; gchar *inword; + QuoteClass qc; lines=g_strsplit(etext,"\n",0); for (j=0;lines[j];j++) { @@ -507,9 +505,16 @@ results.binlen++; if (g_unichar_isalpha(g_utf8_get_char(s))) results.alphalen++; - if (s>lines[j] && g_utf8_get_char(s)==CHAR_DQUOTE && - isalpha(g_utf8_get_char(g_utf8_prev_char(s)))) - results.endquote_count++; + if (s>lines[j]) + { + if (CHAR_IS_DQUOTE(g_utf8_get_char(s))) + qc=QUOTE_CLASS(g_utf8_get_char(s)); + else + qc=INVALID_QUOTE; + if ((qc==CLOSING_QUOTE || qc==NEUTRAL_QUOTE) && + isalpha(g_utf8_get_char(g_utf8_prev_char(s)))) + results.endquote_count++; + } } if (llen>2 && lastlen>2 && lastlen2 && lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE) @@ -788,7 +793,7 @@ * * Returns: TRUE if the line is empty. */ -gboolean analyse_quotes(const char *aline,struct counters *counters) +gboolean analyse_quotes(const char *aline,int linecnt,struct counters *counters) { int guessquote=0; /* assume the line is empty until proven otherwise */ @@ -796,23 +801,24 @@ const char *s=aline,*sprev,*snext; gunichar c; sprev=NULL; + GError *tmp_err=NULL; while (*s) { snext=g_utf8_next_char(s); c=g_utf8_get_char(s); - if (c==CHAR_DQUOTE) - counters->quot++; - if (CHAR_IS_SQUOTE(c)) + if (CHAR_IS_DQUOTE(c)) + (void)count_quote(counters,c,QUOTE_CLASS(c),&tmp_err); + else if (CHAR_IS_SQUOTE(c) && pswit[SQUOTE_SWITCH]) { if (s==aline) { /* - * At start of line, it can only be an openquote. + * At start of line, it can only be a quotation mark. * Hardcode a very common exception! */ if (!g_str_has_prefix(snext,"tis") && !g_str_has_prefix(snext,"Tis")) - increment_matching(counters,c,TRUE); + (void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err); } else if (g_unichar_isalpha(g_utf8_get_char(sprev)) && g_unichar_isalpha(g_utf8_get_char(snext))) @@ -822,15 +828,20 @@ else if (c==CHAR_OPEN_SQUOTE || c==CHAR_LS_QUOTE || g_unichar_isalpha(g_utf8_get_char(snext))) { - /* it damwell better BE an openquote */ + /* certainly looks like a quotation mark */ if (!g_str_has_prefix(snext,"tis") && !g_str_has_prefix(snext,"Tis")) /* hardcode a very common exception! */ - increment_matching(counters,c,TRUE); + { + if (strchr(".?!,;:",g_utf8_get_char(sprev))) + (void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err); + else + (void)count_quote(counters,c,OPENING_QUOTE,&tmp_err); + } } else { - /* now - is it a closequote? */ + /* now - is it a quotation mark? */ guessquote=0; /* accumulate clues */ if (g_unichar_isalpha(g_utf8_get_char(sprev))) { @@ -844,25 +855,31 @@ /* bonus marks! */ guessquote-=2; } + if (innermost_quote_matches(counters,c)) + /* + * Give it the benefit of some doubt, + * if a squote is already open. + */ + guessquote++; + else + guessquote--; + if (guessquote>=0) + (void)count_quote(counters,c,CLOSING_QUOTE,&tmp_err); } - /* it doesn't have a letter either side */ - else if (strchr(".?!,;:",g_utf8_get_char(sprev)) && - strchr(".?!,;: ",g_utf8_get_char(snext))) - guessquote+=8; /* looks like a closequote */ else - guessquote++; - if (matching_difference(counters,CHAR_SQUOTE)>0) - /* - * Give it the benefit of some doubt, - * if a squote is already open. - */ - guessquote++; - else - guessquote--; - if (guessquote>=0) - increment_matching(counters,c,FALSE); + /* no adjacent letter - it must be a quote of some kind */ + (void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err); } } + if (tmp_err) + { + if (pswit[ECHO_SWITCH]) + g_print("\n%s\n",aline); + if (!pswit[OVERVIEW_SWITCH]) + g_print(" Line %ld column %ld - %s\n", + linecnt,g_utf8_pointer_to_offset(aline,s)+1,tmp_err->message); + g_clear_error(&tmp_err); + } if (c!=CHAR_SPACE && c!='-' && c!='.' && c!=CHAR_ASTERISK && c!='\r' && c!='\n') isemptyline=FALSE; /* ignore lines like * * * as spacers */ @@ -1779,6 +1796,7 @@ gboolean isacro,isellipsis; const char *s; gunichar c,nc,pc,n2c; + int parity; c=g_utf8_get_char(aline); nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0; for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s)) @@ -1917,7 +1935,7 @@ c=nc; nc=g_utf8_get_char(g_utf8_next_char(s)); /* for each character in the line after the first */ - if (c==CHAR_DQUOTE) + if (CHAR_IS_DQUOTE(c)) { if (!g_utf8_strchr(" _-.'`,;:!/([{?}])",-1,pc) && !g_utf8_strchr(" _-.'`,;:!/([{?}])",-1,nc) && nc || @@ -1939,10 +1957,18 @@ { c=nc; nc=g_utf8_get_char(g_utf8_next_char(s)); - if (c==CHAR_DQUOTE) + if (CHAR_IS_DQUOTE(c)) { - parities->dquote=!parities->dquote; - if (!parities->dquote) + if (c==CHAR_DQUOTE) + { + parities->dquote=!parities->dquote; + parity=parities->dquote; + } + else if (c==CHAR_LD_QUOTE) + parity=1; + else + parity=0; + if (!parity) { /* parity even */ if (!g_utf8_strchr("_-.'`/,;:!?)]} ",-1,nc)) @@ -1975,7 +2001,8 @@ } } } - if (g_utf8_get_char(aline)==CHAR_DQUOTE) + c=g_utf8_get_char(aline); + if (CHAR_IS_DQUOTE(c)) { if (g_utf8_strchr(",;:!?)]} ",-1, g_utf8_get_char(g_utf8_next_char(aline)))) @@ -2200,7 +2227,7 @@ s=g_utf8_prev_char(aline+lbytes); c1=g_utf8_get_char(s); c2=g_utf8_get_char(g_utf8_prev_char(s)); - if ((c1==CHAR_DQUOTE || CHAR_IS_SQUOTE(c1)) && c2==CHAR_SPACE) + if ((CHAR_IS_DQUOTE(c1) || CHAR_IS_SQUOTE(c1)) && c2==CHAR_SPACE) { if (pswit[ECHO_SWITCH]) g_print("\n%s\n",aline); @@ -2285,15 +2312,17 @@ { const char *s; gunichar c,nc,pc; + QuoteClass qc; c=g_utf8_get_char(aline); nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0; for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s)) { pc=c; c=nc; + qc=CHAR_IS_DQUOTE(c)?QUOTE_CLASS(c):INVALID_QUOTE; nc=g_utf8_get_char(g_utf8_next_char(s)); /* for each character in the line except 1st */ - if (c==CHAR_DQUOTE && isalpha(pc)) + if ((qc==CLOSING_QUOTE || qc==NEUTRAL_QUOTE) && isalpha(pc)) { if (pswit[ECHO_SWITCH]) g_print("\n%s\n",aline); @@ -2396,6 +2425,7 @@ gboolean letter_on_line=FALSE; const char *s; gunichar c; + gboolean closing_quote; for (s=prevline;*s;s=g_utf8_next_char(s)) if (g_unichar_isalpha(g_utf8_get_char(s))) { @@ -2417,7 +2447,11 @@ { s=g_utf8_prev_char(s); c=g_utf8_get_char(s); - } while (CHAR_IS_CLOSING_QUOTE(c) && c>CHAR_SPACE && s>prevline); + if (QUOTE_CLASS(c)==CLOSING_QUOTE || QUOTE_CLASS(c)==NEUTRAL_QUOTE) + closing_quote=TRUE; + else + closing_quote=FALSE; + } while (closing_quote && s>prevline); for (;s>prevline;s=g_utf8_prev_char(s)) { if (g_unichar_isalpha(g_utf8_get_char(s))) @@ -2548,7 +2582,7 @@ } checked_linecnt++; print_pending(aline,parastart,&pending); - isemptyline=analyse_quotes(aline,&counters); + isemptyline=analyse_quotes(aline,linecnt,&counters); if (isnewpara && !isemptyline) { /* This line is the start of a new paragraph. */