1.1 --- a/bl/Makefile.am Mon Sep 23 21:18:27 2013 +0100
1.2 +++ b/bl/Makefile.am Wed Oct 02 23:51:18 2013 +0100
1.3 @@ -4,4 +4,4 @@
1.4
1.5 noinst_LTLIBRARIES=libbl.la
1.6 libbl_la_SOURCES=bl.h textfileutils.c textfileutils.h spawn.c spawn.h \
1.7 - path.c path.h mkdtemp.c mkdtemp.h print.c print.h
1.8 + path.c path.h mkdtemp.c mkdtemp.h print.c print.h utf8.c utf8.h
2.1 --- a/bl/bl.h Mon Sep 23 21:18:27 2013 +0100
2.2 +++ b/bl/bl.h Wed Oct 02 23:51:18 2013 +0100
2.3 @@ -3,3 +3,4 @@
2.4 #include <bl/path.h>
2.5 #include <bl/mkdtemp.h>
2.6 #include <bl/print.h>
2.7 +#include <bl/utf8.h>
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
3.2 +++ b/bl/utf8.c Wed Oct 02 23:51:18 2013 +0100
3.3 @@ -0,0 +1,24 @@
3.4 +#include <stdlib.h>
3.5 +#include <string.h>
3.6 +#include <glib.h>
3.7 +#include <bl/bl.h>
3.8 +
3.9 +/*
3.10 + * Creates a new string length bytes long filled with fill_char.
3.11 + * The returned string should be freed when no longer needed.
3.12 + */
3.13 +gchar *utf8_strnfill(gsize length,gunichar fill_char)
3.14 +{
3.15 + int n,i;
3.16 + gchar *s;
3.17 + char utf8[6];
3.18 + n=g_unichar_to_utf8(fill_char,utf8);
3.19 + s=g_new(gchar,length*n+1);
3.20 + if (n==1)
3.21 + memset(s,utf8[0],length);
3.22 + else
3.23 + for(i=0;i<length;i++)
3.24 + memcpy(s+i*n,utf8,n);
3.25 + s[length*n]='\0';
3.26 + return s;
3.27 +}
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
4.2 +++ b/bl/utf8.h Wed Oct 02 23:51:18 2013 +0100
4.3 @@ -0,0 +1,6 @@
4.4 +#ifndef BL_UTF8_H
4.5 +#define BL_UTF8_H
4.6 +
4.7 +gchar *utf8_strnfill(gsize length,gunichar fill_char);
4.8 +
4.9 +#endif /* BL_UTF8_H */
5.1 --- a/bookloupe/bookloupe.c Mon Sep 23 21:18:27 2013 +0100
5.2 +++ b/bookloupe/bookloupe.c Wed Oct 02 23:51:18 2013 +0100
5.3 @@ -160,8 +160,7 @@
5.4 { NULL }
5.5 };
5.6
5.7 -long cnt_dquot; /* for overview mode, count of doublequote queries */
5.8 -long cnt_squot; /* for overview mode, count of singlequote queries */
5.9 +long cnt_quote; /* for overview mode, count of quote queries */
5.10 long cnt_brack; /* for overview mode, count of brackets queries */
5.11 long cnt_bin; /* for overview mode, count of non-ASCII queries */
5.12 long cnt_odd; /* for overview mode, count of odd character queries */
5.13 @@ -407,10 +406,8 @@
5.14 g_print(" Line-end problems: %14ld\n",cnt_lineend);
5.15 if (cnt_word)
5.16 g_print(" Common typos: %14ld\n",cnt_word);
5.17 - if (cnt_dquot)
5.18 - g_print(" Unmatched quotes: %14ld\n",cnt_dquot);
5.19 - if (cnt_squot)
5.20 - g_print(" Unmatched SingleQuotes: %14ld\n",cnt_squot);
5.21 + if (cnt_quote)
5.22 + g_print(" Unmatched quotes: %14ld\n",cnt_quote);
5.23 if (cnt_brack)
5.24 g_print(" Unmatched brackets: %14ld\n",cnt_brack);
5.25 if (cnt_bin)
5.26 @@ -425,8 +422,8 @@
5.27 g_print(" Possible HTML tags: %14ld\n",cnt_html);
5.28 g_print("\n");
5.29 g_print(" TOTAL QUERIES %14ld\n",
5.30 - cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+
5.31 - cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend);
5.32 + cnt_quote+cnt_brack+cnt_bin+cnt_odd+cnt_long+cnt_short+cnt_punct+
5.33 + cnt_dash+cnt_word+cnt_html+cnt_lineend);
5.34 }
5.35 g_free(running_from);
5.36 if (usertypo)
5.37 @@ -453,6 +450,7 @@
5.38 long spline=0,nspline=0;
5.39 static struct first_pass_results results={0};
5.40 gchar *inword;
5.41 + QuoteClass qc;
5.42 lines=g_strsplit(etext,"\n",0);
5.43 for (j=0;lines[j];j++)
5.44 {
5.45 @@ -507,9 +505,16 @@
5.46 results.binlen++;
5.47 if (g_unichar_isalpha(g_utf8_get_char(s)))
5.48 results.alphalen++;
5.49 - if (s>lines[j] && g_utf8_get_char(s)==CHAR_DQUOTE &&
5.50 - isalpha(g_utf8_get_char(g_utf8_prev_char(s))))
5.51 - results.endquote_count++;
5.52 + if (s>lines[j])
5.53 + {
5.54 + if (CHAR_IS_DQUOTE(g_utf8_get_char(s)))
5.55 + qc=QUOTE_CLASS(g_utf8_get_char(s));
5.56 + else
5.57 + qc=INVALID_QUOTE;
5.58 + if ((qc==CLOSING_QUOTE || qc==NEUTRAL_QUOTE) &&
5.59 + isalpha(g_utf8_get_char(g_utf8_prev_char(s))))
5.60 + results.endquote_count++;
5.61 + }
5.62 }
5.63 if (llen>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE && lastblen>2 &&
5.64 lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE)
5.65 @@ -788,7 +793,7 @@
5.66 *
5.67 * Returns: TRUE if the line is empty.
5.68 */
5.69 -gboolean analyse_quotes(const char *aline,struct counters *counters)
5.70 +gboolean analyse_quotes(const char *aline,int linecnt,struct counters *counters)
5.71 {
5.72 int guessquote=0;
5.73 /* assume the line is empty until proven otherwise */
5.74 @@ -796,23 +801,24 @@
5.75 const char *s=aline,*sprev,*snext;
5.76 gunichar c;
5.77 sprev=NULL;
5.78 + GError *tmp_err=NULL;
5.79 while (*s)
5.80 {
5.81 snext=g_utf8_next_char(s);
5.82 c=g_utf8_get_char(s);
5.83 - if (c==CHAR_DQUOTE)
5.84 - counters->quot++;
5.85 - if (CHAR_IS_SQUOTE(c))
5.86 + if (CHAR_IS_DQUOTE(c))
5.87 + (void)count_quote(counters,c,QUOTE_CLASS(c),&tmp_err);
5.88 + else if (CHAR_IS_SQUOTE(c) && pswit[SQUOTE_SWITCH])
5.89 {
5.90 if (s==aline)
5.91 {
5.92 /*
5.93 - * At start of line, it can only be an openquote.
5.94 + * At start of line, it can only be a quotation mark.
5.95 * Hardcode a very common exception!
5.96 */
5.97 if (!g_str_has_prefix(snext,"tis") &&
5.98 !g_str_has_prefix(snext,"Tis"))
5.99 - increment_matching(counters,c,TRUE);
5.100 + (void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err);
5.101 }
5.102 else if (g_unichar_isalpha(g_utf8_get_char(sprev)) &&
5.103 g_unichar_isalpha(g_utf8_get_char(snext)))
5.104 @@ -822,15 +828,20 @@
5.105 else if (c==CHAR_OPEN_SQUOTE || c==CHAR_LS_QUOTE ||
5.106 g_unichar_isalpha(g_utf8_get_char(snext)))
5.107 {
5.108 - /* it damwell better BE an openquote */
5.109 + /* certainly looks like a quotation mark */
5.110 if (!g_str_has_prefix(snext,"tis") &&
5.111 !g_str_has_prefix(snext,"Tis"))
5.112 /* hardcode a very common exception! */
5.113 - increment_matching(counters,c,TRUE);
5.114 + {
5.115 + if (strchr(".?!,;:",g_utf8_get_char(sprev)))
5.116 + (void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err);
5.117 + else
5.118 + (void)count_quote(counters,c,OPENING_QUOTE,&tmp_err);
5.119 + }
5.120 }
5.121 else
5.122 {
5.123 - /* now - is it a closequote? */
5.124 + /* now - is it a quotation mark? */
5.125 guessquote=0; /* accumulate clues */
5.126 if (g_unichar_isalpha(g_utf8_get_char(sprev)))
5.127 {
5.128 @@ -844,25 +855,31 @@
5.129 /* bonus marks! */
5.130 guessquote-=2;
5.131 }
5.132 + if (innermost_quote_matches(counters,c))
5.133 + /*
5.134 + * Give it the benefit of some doubt,
5.135 + * if a squote is already open.
5.136 + */
5.137 + guessquote++;
5.138 + else
5.139 + guessquote--;
5.140 + if (guessquote>=0)
5.141 + (void)count_quote(counters,c,CLOSING_QUOTE,&tmp_err);
5.142 }
5.143 - /* it doesn't have a letter either side */
5.144 - else if (strchr(".?!,;:",g_utf8_get_char(sprev)) &&
5.145 - strchr(".?!,;: ",g_utf8_get_char(snext)))
5.146 - guessquote+=8; /* looks like a closequote */
5.147 else
5.148 - guessquote++;
5.149 - if (matching_difference(counters,CHAR_SQUOTE)>0)
5.150 - /*
5.151 - * Give it the benefit of some doubt,
5.152 - * if a squote is already open.
5.153 - */
5.154 - guessquote++;
5.155 - else
5.156 - guessquote--;
5.157 - if (guessquote>=0)
5.158 - increment_matching(counters,c,FALSE);
5.159 + /* no adjacent letter - it must be a quote of some kind */
5.160 + (void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err);
5.161 }
5.162 }
5.163 + if (tmp_err)
5.164 + {
5.165 + if (pswit[ECHO_SWITCH])
5.166 + g_print("\n%s\n",aline);
5.167 + if (!pswit[OVERVIEW_SWITCH])
5.168 + g_print(" Line %ld column %ld - %s\n",
5.169 + linecnt,g_utf8_pointer_to_offset(aline,s)+1,tmp_err->message);
5.170 + g_clear_error(&tmp_err);
5.171 + }
5.172 if (c!=CHAR_SPACE && c!='-' && c!='.' && c!=CHAR_ASTERISK &&
5.173 c!='\r' && c!='\n')
5.174 isemptyline=FALSE; /* ignore lines like * * * as spacers */
5.175 @@ -1779,6 +1796,7 @@
5.176 gboolean isacro,isellipsis;
5.177 const char *s;
5.178 gunichar c,nc,pc,n2c;
5.179 + int parity;
5.180 c=g_utf8_get_char(aline);
5.181 nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
5.182 for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
5.183 @@ -1917,7 +1935,7 @@
5.184 c=nc;
5.185 nc=g_utf8_get_char(g_utf8_next_char(s));
5.186 /* for each character in the line after the first */
5.187 - if (c==CHAR_DQUOTE)
5.188 + if (CHAR_IS_DQUOTE(c))
5.189 {
5.190 if (!g_utf8_strchr(" _-.'`,;:!/([{?}])",-1,pc) &&
5.191 !g_utf8_strchr(" _-.'`,;:!/([{?}])",-1,nc) && nc ||
5.192 @@ -1939,10 +1957,18 @@
5.193 {
5.194 c=nc;
5.195 nc=g_utf8_get_char(g_utf8_next_char(s));
5.196 - if (c==CHAR_DQUOTE)
5.197 + if (CHAR_IS_DQUOTE(c))
5.198 {
5.199 - parities->dquote=!parities->dquote;
5.200 - if (!parities->dquote)
5.201 + if (c==CHAR_DQUOTE)
5.202 + {
5.203 + parities->dquote=!parities->dquote;
5.204 + parity=parities->dquote;
5.205 + }
5.206 + else if (c==CHAR_LD_QUOTE)
5.207 + parity=1;
5.208 + else
5.209 + parity=0;
5.210 + if (!parity)
5.211 {
5.212 /* parity even */
5.213 if (!g_utf8_strchr("_-.'`/,;:!?)]} ",-1,nc))
5.214 @@ -1975,7 +2001,8 @@
5.215 }
5.216 }
5.217 }
5.218 - if (g_utf8_get_char(aline)==CHAR_DQUOTE)
5.219 + c=g_utf8_get_char(aline);
5.220 + if (CHAR_IS_DQUOTE(c))
5.221 {
5.222 if (g_utf8_strchr(",;:!?)]} ",-1,
5.223 g_utf8_get_char(g_utf8_next_char(aline))))
5.224 @@ -2200,7 +2227,7 @@
5.225 s=g_utf8_prev_char(aline+lbytes);
5.226 c1=g_utf8_get_char(s);
5.227 c2=g_utf8_get_char(g_utf8_prev_char(s));
5.228 - if ((c1==CHAR_DQUOTE || CHAR_IS_SQUOTE(c1)) && c2==CHAR_SPACE)
5.229 + if ((CHAR_IS_DQUOTE(c1) || CHAR_IS_SQUOTE(c1)) && c2==CHAR_SPACE)
5.230 {
5.231 if (pswit[ECHO_SWITCH])
5.232 g_print("\n%s\n",aline);
5.233 @@ -2285,15 +2312,17 @@
5.234 {
5.235 const char *s;
5.236 gunichar c,nc,pc;
5.237 + QuoteClass qc;
5.238 c=g_utf8_get_char(aline);
5.239 nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
5.240 for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
5.241 {
5.242 pc=c;
5.243 c=nc;
5.244 + qc=CHAR_IS_DQUOTE(c)?QUOTE_CLASS(c):INVALID_QUOTE;
5.245 nc=g_utf8_get_char(g_utf8_next_char(s));
5.246 /* for each character in the line except 1st */
5.247 - if (c==CHAR_DQUOTE && isalpha(pc))
5.248 + if ((qc==CLOSING_QUOTE || qc==NEUTRAL_QUOTE) && isalpha(pc))
5.249 {
5.250 if (pswit[ECHO_SWITCH])
5.251 g_print("\n%s\n",aline);
5.252 @@ -2396,6 +2425,7 @@
5.253 gboolean letter_on_line=FALSE;
5.254 const char *s;
5.255 gunichar c;
5.256 + gboolean closing_quote;
5.257 for (s=prevline;*s;s=g_utf8_next_char(s))
5.258 if (g_unichar_isalpha(g_utf8_get_char(s)))
5.259 {
5.260 @@ -2417,7 +2447,11 @@
5.261 {
5.262 s=g_utf8_prev_char(s);
5.263 c=g_utf8_get_char(s);
5.264 - } while (CHAR_IS_CLOSING_QUOTE(c) && c>CHAR_SPACE && s>prevline);
5.265 + if (QUOTE_CLASS(c)==CLOSING_QUOTE || QUOTE_CLASS(c)==NEUTRAL_QUOTE)
5.266 + closing_quote=TRUE;
5.267 + else
5.268 + closing_quote=FALSE;
5.269 + } while (closing_quote && s>prevline);
5.270 for (;s>prevline;s=g_utf8_prev_char(s))
5.271 {
5.272 if (g_unichar_isalpha(g_utf8_get_char(s)))
5.273 @@ -2548,7 +2582,7 @@
5.274 }
5.275 checked_linecnt++;
5.276 print_pending(aline,parastart,&pending);
5.277 - isemptyline=analyse_quotes(aline,&counters);
5.278 + isemptyline=analyse_quotes(aline,linecnt,&counters);
5.279 if (isnewpara && !isemptyline)
5.280 {
5.281 /* This line is the start of a new paragraph. */
6.1 --- a/bookloupe/bookloupe.h Mon Sep 23 21:18:27 2013 +0100
6.2 +++ b/bookloupe/bookloupe.h Wed Oct 02 23:51:18 2013 +0100
6.3 @@ -24,15 +24,17 @@
6.4
6.5 #define CHAR_LS_QUOTE 0x2018
6.6 #define CHAR_RS_QUOTE 0x2019
6.7 +#define CHAR_LD_QUOTE 0x201C
6.8 +#define CHAR_RD_QUOTE 0x201D
6.9
6.10 #define CHAR_IS_SQUOTE(c) ((c)==CHAR_SQUOTE || (c)==CHAR_OPEN_SQUOTE || \
6.11 (c)==CHAR_LS_QUOTE || (c)==CHAR_RS_QUOTE)
6.12
6.13 +#define CHAR_IS_DQUOTE(c) ((c)==CHAR_DQUOTE || (c)==CHAR_LD_QUOTE || \
6.14 + (c)==CHAR_RD_QUOTE)
6.15 +
6.16 #define CHAR_IS_APOSTROPHE(c) ((c)==CHAR_SQUOTE || (c)==CHAR_RS_QUOTE)
6.17
6.18 -#define CHAR_IS_CLOSING_QUOTE(c) \
6.19 - ((c)==CHAR_DQUOTE || (c)==CHAR_SQUOTE || (c)==CHAR_RS_QUOTE)
6.20 -
6.21 /* longest and shortest normal PG line lengths */
6.22 #define LONGEST_PG_LINE 75
6.23 #define WAY_TOO_LONG 80
6.24 @@ -81,8 +83,8 @@
6.25
6.26 extern gboolean pswit[SWITNO];
6.27
6.28 -extern long cnt_dquot,cnt_squot,cnt_brack,cnt_bin,cnt_odd,cnt_long,cnt_short;
6.29 -extern long cnt_punct,cnt_dash,cnt_word,cnt_html,cnt_lineend,cnt_spacend;
6.30 -extern long linecnt,checked_linecnt;
6.31 +extern long cnt_quote,cnt_brack,cnt_bin,cnt_odd,cnt_long,cnt_short,cnt_punct;
6.32 +extern long cnt_dash,cnt_word,cnt_html,cnt_lineend,cnt_spacend,linecnt;
6.33 +extern long checked_linecnt;
6.34
6.35 #endif /* BOOKOUPE_H */
7.1 --- a/bookloupe/counters.c Mon Sep 23 21:18:27 2013 +0100
7.2 +++ b/bookloupe/counters.c Wed Oct 02 23:51:18 2013 +0100
7.3 @@ -8,6 +8,14 @@
7.4 int open,close;
7.5 };
7.6
7.7 +GQuark counters_error_quark(void)
7.8 +{
7.9 + static GQuark quark;
7.10 + if (!quark)
7.11 + quark=g_quark_from_static_string("counters_error");
7.12 + return quark;
7.13 +}
7.14 +
7.15 static struct matching_counter *matching_counter_new(void)
7.16 {
7.17 return g_slice_new0(struct matching_counter);
7.18 @@ -45,11 +53,64 @@
7.19 return GINT_TO_POINTER((gint)CHAR_SQUOTE);
7.20 else if (ch==CHAR_LS_QUOTE || ch==CHAR_RS_QUOTE)
7.21 return GINT_TO_POINTER((gint)CHAR_LS_QUOTE);
7.22 + else if (ch==CHAR_LD_QUOTE || ch==CHAR_RD_QUOTE)
7.23 + return GINT_TO_POINTER((gint)CHAR_LD_QUOTE);
7.24 + else if (ch==CHAR_DQUOTE)
7.25 + return GINT_TO_POINTER((gint)ch);
7.26 else if (ch<0x4000 || ch-0x4000>=NO_SPECIAL_COUNTERS)
7.27 + g_warning("Matching pair not found for U+%04" G_GINT32_MODIFIER "X",ch);
7.28 + return GINT_TO_POINTER((gint)ch);
7.29 +}
7.30 +
7.31 +gboolean innermost_quote_matches(struct counters *counters,gunichar ch)
7.32 +{
7.33 + gpointer head;
7.34 + if (counters->open_quotes)
7.35 + head=counters->open_quotes->data;
7.36 + else
7.37 + head=NULL;
7.38 + return head==matching_key(ch);
7.39 +}
7.40 +
7.41 +gboolean count_quote(struct counters *counters,gunichar ch,QuoteClass klass,
7.42 + GError **err)
7.43 +{
7.44 + gboolean retval=TRUE;
7.45 + gpointer head;
7.46 + if (counters->open_quotes)
7.47 + head=counters->open_quotes->data;
7.48 + else
7.49 + head=NULL;
7.50 + switch(klass)
7.51 {
7.52 - g_warning("Matching pair not found for U+%04" G_GINT32_MODIFIER "X",ch);
7.53 - return GINT_TO_POINTER((gint)ch);
7.54 + case NEUTRAL_QUOTE:
7.55 + if (head!=matching_key(ch))
7.56 + goto opening;
7.57 + /* else fall through */
7.58 + case CLOSING_QUOTE:
7.59 + if (head!=matching_key(ch))
7.60 + {
7.61 + g_set_error(err,COUNTERS_ERROR,COUNTERS_ERROR_FAILED,
7.62 + "Closing quotation mark with no matching open?");
7.63 + retval=FALSE;
7.64 + }
7.65 + else
7.66 + counters->open_quotes=g_slist_delete_link(counters->open_quotes,
7.67 + counters->open_quotes);
7.68 + break;
7.69 + case OPENING_QUOTE:
7.70 + if (head==matching_key(ch))
7.71 + {
7.72 + g_set_error(err,COUNTERS_ERROR,COUNTERS_ERROR_FAILED,
7.73 + "Directly nested quotation marks of same type?");
7.74 + retval=FALSE;
7.75 + }
7.76 +opening:
7.77 + head=matching_key(ch);
7.78 + counters->open_quotes=g_slist_prepend(counters->open_quotes,head);
7.79 + break;
7.80 }
7.81 + return retval;
7.82 }
7.83
7.84 void increment_matching(struct counters *counters,gunichar ch,gboolean open)
8.1 --- a/bookloupe/counters.h Mon Sep 23 21:18:27 2013 +0100
8.2 +++ b/bookloupe/counters.h Wed Oct 02 23:51:18 2013 +0100
8.3 @@ -3,18 +3,42 @@
8.4
8.5 #include <glib.h>
8.6
8.7 +#define COUNTERS_ERROR counters_error_quark()
8.8 +
8.9 +typedef enum
8.10 +{
8.11 + COUNTERS_ERROR_FAILED, /* Generic failure */
8.12 +} CountersError;
8.13 +
8.14 /* Special counters live in the private use area */
8.15 enum {
8.16 COUNTER_ILLUSTRATION=0xE000,
8.17 NO_SPECIAL_COUNTERS
8.18 };
8.19
8.20 +typedef enum {
8.21 + OPENING_QUOTE,
8.22 + CLOSING_QUOTE,
8.23 + NEUTRAL_QUOTE,
8.24 + INVALID_QUOTE
8.25 +} QuoteClass;
8.26 +
8.27 +#define QUOTE_CLASS(c) \
8.28 + (((c)==CHAR_RD_QUOTE || (c)==CHAR_RS_QUOTE)?CLOSING_QUOTE: \
8.29 + ((c)==CHAR_LD_QUOTE || (c)==CHAR_LS_QUOTE || (c)==CHAR_OPEN_SQUOTE)?\
8.30 + OPENING_QUOTE:((c)==CHAR_DQUOTE || (c)==CHAR_SQUOTE)?NEUTRAL_QUOTE:\
8.31 + INVALID_QUOTE)
8.32 +
8.33 struct counters {
8.34 GTree *matching;
8.35 - long quot;
8.36 int c_unders;
8.37 + GSList *open_quotes;
8.38 };
8.39
8.40 +GQuark counters_error_quark(void);
8.41 +gboolean innermost_quote_matches(struct counters *counters,gunichar ch);
8.42 +gboolean count_quote(struct counters *counters,gunichar ch,QuoteClass klass,
8.43 + GError **err);
8.44 void increment_matching(struct counters *counters,gunichar ch,gboolean open);
8.45 int matching_count(const struct counters *counters,gunichar ch,gboolean open);
8.46 int matching_difference(const struct counters *counters,gunichar ch);
9.1 --- a/bookloupe/pending.c Mon Sep 23 21:18:27 2013 +0100
9.2 +++ b/bookloupe/pending.c Wed Oct 02 23:51:18 2013 +0100
9.3 @@ -1,6 +1,7 @@
9.4 #include <stdlib.h>
9.5 #include <string.h>
9.6 #include <glib.h>
9.7 +#include <bl/bl.h>
9.8 #include "bookloupe.h"
9.9 #include "pending.h"
9.10
9.11 @@ -15,20 +16,9 @@
9.12 void print_pending(const char *aline,const char *parastart,
9.13 struct pending *pending)
9.14 {
9.15 - const char *s;
9.16 - gunichar c;
9.17 if (aline)
9.18 - {
9.19 - s=aline;
9.20 - while (*s==' ')
9.21 - s++;
9.22 - c=g_utf8_get_char(s);
9.23 - }
9.24 - else
9.25 - {
9.26 - s=NULL;
9.27 - c='\0';
9.28 - }
9.29 + while (g_unichar_isspace(g_utf8_get_char(aline)))
9.30 + aline=g_utf8_next_char(aline);
9.31 if (pending->illustration.warning_text)
9.32 {
9.33 if (aline)
9.34 @@ -52,38 +42,25 @@
9.35 pending->illustration.queried_line=NULL;
9.36 }
9.37 }
9.38 - if (pending->dquote)
9.39 + if (pending->quote)
9.40 {
9.41 - if (c!=CHAR_DQUOTE || pswit[QPARA_SWITCH])
9.42 + if (!pending->continuing_quote || !aline ||
9.43 + !g_str_has_prefix(aline,pending->continuing_quote))
9.44 {
9.45 if (!pswit[OVERVIEW_SWITCH])
9.46 {
9.47 if (pswit[ECHO_SWITCH])
9.48 g_print("\n%s\n",parastart);
9.49 - g_print("%s\n",pending->dquote);
9.50 + g_print("%s\n",pending->quote);
9.51 }
9.52 else
9.53 - cnt_dquot++;
9.54 + cnt_quote++;
9.55 }
9.56 - g_free(pending->dquote);
9.57 - pending->dquote=NULL;
9.58 + g_free(pending->quote);
9.59 + pending->quote=NULL;
9.60 }
9.61 - if (pending->squote)
9.62 - {
9.63 - if (!CHAR_IS_SQUOTE(c) || pswit[QPARA_SWITCH] || pending->squot)
9.64 - {
9.65 - if (!pswit[OVERVIEW_SWITCH])
9.66 - {
9.67 - if (pswit[ECHO_SWITCH])
9.68 - g_print("\n%s\n",parastart);
9.69 - g_print("%s\n",pending->squote);
9.70 - }
9.71 - else
9.72 - cnt_squot++;
9.73 - }
9.74 - g_free(pending->squote);
9.75 - pending->squote=NULL;
9.76 - }
9.77 + g_free(pending->continuing_quote);
9.78 + pending->continuing_quote=NULL;
9.79 if (pending->rbrack)
9.80 {
9.81 if (!pswit[OVERVIEW_SWITCH])
9.82 @@ -159,34 +136,35 @@
9.83 * quotes on _every_ paragraph, whether the next begins with a
9.84 * quote or not.
9.85 */
9.86 -void check_for_mismatched_quotes(const struct counters *counters,
9.87 +void check_for_mismatched_quotes(struct counters *counters,
9.88 struct pending *pending)
9.89 {
9.90 - int squote_straight,squote_curved,difference;
9.91 - if (counters->quot%2)
9.92 - pending->dquote=
9.93 - g_strdup_printf(" Line %ld - Mismatched quotes",linecnt);
9.94 - if (pswit[SQUOTE_SWITCH])
9.95 + gboolean all_single;
9.96 + gunichar c;
9.97 + int difference;
9.98 + const char *quote_type;
9.99 + GString *str;
9.100 + if (counters->open_quotes)
9.101 {
9.102 - if (matching_count(counters,CHAR_SQUOTE,TRUE))
9.103 - squote_straight=matching_difference(counters,CHAR_SQUOTE);
9.104 + str=g_string_new(NULL);
9.105 + counters->open_quotes=g_slist_reverse(counters->open_quotes);
9.106 + all_single=TRUE;
9.107 + while(counters->open_quotes)
9.108 + {
9.109 + c=GPOINTER_TO_INT(counters->open_quotes->data);
9.110 + if (!CHAR_IS_SQUOTE(c))
9.111 + all_single=FALSE;
9.112 + g_string_append_unichar(str,c);
9.113 + counters->open_quotes=g_slist_delete_link(counters->open_quotes,
9.114 + counters->open_quotes);
9.115 + }
9.116 + pending->continuing_quote=g_string_free(str,FALSE);
9.117 + if (all_single)
9.118 + quote_type="singlequotes?";
9.119 else
9.120 - squote_straight=0;
9.121 - if (matching_count(counters,CHAR_LS_QUOTE,TRUE))
9.122 - squote_curved=matching_difference(counters,CHAR_LS_QUOTE);
9.123 - else
9.124 - squote_curved=0;
9.125 - if (squote_straight || squote_curved)
9.126 - pending->squote=
9.127 - g_strdup_printf(" Line %ld - Mismatched singlequotes?",
9.128 - linecnt);
9.129 - if (squote_straight && squote_straight!=1 ||
9.130 - squote_curved && squote_curved!=1)
9.131 - /*
9.132 - * Flag it to be noted regardless of the
9.133 - * first char of the next para.
9.134 - */
9.135 - pending->squot=1;
9.136 + quote_type="quotes";
9.137 + pending->quote=g_strdup_printf(" Line %ld - Mismatched %s",linecnt,
9.138 + quote_type);
9.139 }
9.140 difference=matching_difference(counters,COUNTER_ILLUSTRATION);
9.141 if (difference)
10.1 --- a/bookloupe/pending.h Mon Sep 23 21:18:27 2013 +0100
10.2 +++ b/bookloupe/pending.h Wed Oct 02 23:51:18 2013 +0100
10.3 @@ -9,15 +9,15 @@
10.4 };
10.5
10.6 struct pending {
10.7 - char *dquote,*squote,*rbrack,*sbrack,*cbrack,*unders;
10.8 - long squot;
10.9 + char *quote,*rbrack,*sbrack,*cbrack,*unders;
10.10 + char *continuing_quote;
10.11 struct pending_warning illustration;
10.12 };
10.13
10.14 void print_pending(const char *aline,const char *parastart,
10.15 struct pending *pending);
10.16 void reset_pending(struct pending *pending);
10.17 -void check_for_mismatched_quotes(const struct counters *counters,
10.18 +void check_for_mismatched_quotes(struct counters *counters,
10.19 struct pending *pending);
10.20
10.21 #endif /* PENDING_H */
11.1 --- a/test/bookloupe/Makefile.am Mon Sep 23 21:18:27 2013 +0100
11.2 +++ b/test/bookloupe/Makefile.am Wed Oct 02 23:51:18 2013 +0100
11.3 @@ -1,5 +1,5 @@
11.4 TESTS_ENVIRONMENT=BOOKLOUPE=../../bookloupe/bookloupe ../harness/loupe-test
11.5 -TESTS=non-ascii.tst long-line.tst curved-single-quotes.tst \
11.6 - curved-genitives.tst multi-line-illustration.tst
11.7 +TESTS=non-ascii.tst long-line.tst curved-single-quotes.tst curved-quotes.tst \
11.8 + runfox-quotes.tst curved-genitives.tst multi-line-illustration.tst
11.9
11.10 dist_pkgdata_DATA=$(TESTS)
12.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
12.2 +++ b/test/bookloupe/curved-quotes.tst Wed Oct 02 23:51:18 2013 +0100
12.3 @@ -0,0 +1,48 @@
12.4 +**************** INPUT ****************
12.5 +When Tom had made fast his motorboat, he went to the rowing craft to
12.6 +see if it was in good condition. He saw a piece of paper on one of the
12.7 +seats, held down by a little stone. Picking it up he read:
12.8 +
12.9 + “Many thanks for the use of your boat. I had a fine row, and
12.10 + I feel better, though I’m as much up a tree as ever. I hope
12.11 + to see you again, sometime. If ever you are near Elmwood Hall,
12.12 + look me up.
12.13 +
12.14 + “BRUCE BENNINGTON.”
12.15 +
12.16 +That was nice of him,” remarked Will, as Tom showed him the note.
12.17 +
12.18 +“And he didn’t damage your boat any," spoke Dick.
12.19 +
12.20 +"No, he knows how to handle ’em--he rows on the Elmwood Hall crew,” said
12.21 +Tom. “Well, so long, fellows. I’m going for a long run to-morrow, if
12.22 +you’d like to come.”
12.23 +
12.24 +“Sure! they chorused.
12.25 +**************** WARNINGS ****************
12.26 +<expected>
12.27 + <error>
12.28 + <at line="12" column="22"/>
12.29 + <text>Closing quotation mark with no matching open?</text>
12.30 + </error>
12.31 + <error>
12.32 + <at line="15"/>
12.33 + <text>Mismatched quotes</text>
12.34 + </error>
12.35 + <error>
12.36 + <at line="16" column="67"/>
12.37 + <text>Closing quotation mark with no matching open?</text>
12.38 + </error>
12.39 + <error>
12.40 + <at line="19"/>
12.41 + <text>Mismatched quotes</text>
12.42 + </error>
12.43 + <error>
12.44 + <at line="21"/>
12.45 + <text>Mismatched quotes</text>
12.46 + </error>
12.47 + <false-positive>
12.48 + <at line="14" column="37"/>
12.49 + <text>Wrongspaced quotes?</text>
12.50 + </false-positive>
12.51 +</expected>
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
13.2 +++ b/test/bookloupe/runfox-quotes.tst Wed Oct 02 23:51:18 2013 +0100
13.3 @@ -0,0 +1,17 @@
13.4 +**************** INPUT ****************
13.5 +“Now I see how it happened,” said Spotted Deer. “If you were not very
13.6 +strong you would have been dead. Yes, if you were not a good war-leader
13.7 +you would not have come back here. ”We will not talk any more about it.“
13.8 +
13.9 +“Well, what did you find?” inquired Running Fox.
13.10 +**************** WARNINGS ****************
13.11 +<expected>
13.12 + <error>
13.13 + <at line="3" column="36"/>
13.14 + <text>Wrongspaced quotes?</text>
13.15 + </error>
13.16 + <error>
13.17 + <at line="3" column="72"/>
13.18 + <text>Wrongspaced quotes?</text>
13.19 + </error>
13.20 +</expected>
14.1 --- a/test/compatibility/Makefile.am Mon Sep 23 21:18:27 2013 +0100
14.2 +++ b/test/compatibility/Makefile.am Wed Oct 02 23:51:18 2013 +0100
14.3 @@ -7,6 +7,7 @@
14.4 dashes.tst control-characters.tst unusual-characters.tst \
14.5 windows-1252.tst periods.tst long-line.tst unmarked-paragraph.tst \
14.6 hebe-jeebies.tst mail-from.tst scannos.tst before-comma.tst \
14.7 - before-period.tst double-punctuation.tst genitives.tst embedded-cr.tst
14.8 + before-period.tst double-punctuation.tst genitives.tst embedded-cr.tst \
14.9 + continuing-quotes.tst
14.10
14.11 dist_pkgdata_DATA=$(TESTS)
15.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
15.2 +++ b/test/compatibility/continuing-quotes.tst Wed Oct 02 23:51:18 2013 +0100
15.3 @@ -0,0 +1,14 @@
15.4 +**************** INPUT ****************
15.5 +When Tom had made fast his motorboat, he went to the rowing craft to
15.6 +see if it was in good condition. He saw a piece of paper on one of the
15.7 +seats, held down by a little stone. Picking it up he read:
15.8 +
15.9 + "Many thanks for the use of your boat. I had a fine row, and
15.10 + I feel better, though I'm as much up a tree as ever. I hope
15.11 + to see you again, sometime. If ever you are near Elmwood Hall,
15.12 + look me up.
15.13 +
15.14 + "BRUCE BENNINGTON."
15.15 +
15.16 +"That was nice of him," remarked Will, as Tom showed him the note.
15.17 +**************** EXPECTED ****************