Fix bug #11: Test for balanced "slanted" UTF-8 quotation marks 8220/8221
authorali <ali@juiblex.co.uk>
Wed Oct 02 23:51:18 2013 +0100 (2013-10-02)
changeset 94466f43a12118
parent 93 9fb13a5dde3b
child 95 d13e2582c2b5
Fix bug #11: Test for balanced "slanted" UTF-8 quotation marks 8220/8221
bl/Makefile.am
bl/bl.h
bl/utf8.c
bl/utf8.h
bookloupe/bookloupe.c
bookloupe/bookloupe.h
bookloupe/counters.c
bookloupe/counters.h
bookloupe/pending.c
bookloupe/pending.h
test/bookloupe/Makefile.am
test/bookloupe/curved-quotes.tst
test/bookloupe/runfox-quotes.tst
test/compatibility/Makefile.am
test/compatibility/continuing-quotes.tst
     1.1 --- a/bl/Makefile.am	Mon Sep 23 21:18:27 2013 +0100
     1.2 +++ b/bl/Makefile.am	Wed Oct 02 23:51:18 2013 +0100
     1.3 @@ -4,4 +4,4 @@
     1.4  
     1.5  noinst_LTLIBRARIES=libbl.la
     1.6  libbl_la_SOURCES=bl.h textfileutils.c textfileutils.h spawn.c spawn.h \
     1.7 -	path.c path.h mkdtemp.c mkdtemp.h print.c print.h
     1.8 +	path.c path.h mkdtemp.c mkdtemp.h print.c print.h utf8.c utf8.h
     2.1 --- a/bl/bl.h	Mon Sep 23 21:18:27 2013 +0100
     2.2 +++ b/bl/bl.h	Wed Oct 02 23:51:18 2013 +0100
     2.3 @@ -3,3 +3,4 @@
     2.4  #include <bl/path.h>
     2.5  #include <bl/mkdtemp.h>
     2.6  #include <bl/print.h>
     2.7 +#include <bl/utf8.h>
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/bl/utf8.c	Wed Oct 02 23:51:18 2013 +0100
     3.3 @@ -0,0 +1,24 @@
     3.4 +#include <stdlib.h>
     3.5 +#include <string.h>
     3.6 +#include <glib.h>
     3.7 +#include <bl/bl.h>
     3.8 +
     3.9 +/*
    3.10 + * Creates a new string length bytes long filled with fill_char.
    3.11 + * The returned string should be freed when no longer needed.
    3.12 + */
    3.13 +gchar *utf8_strnfill(gsize length,gunichar fill_char)
    3.14 +{
    3.15 +    int n,i;
    3.16 +    gchar *s;
    3.17 +    char utf8[6];
    3.18 +    n=g_unichar_to_utf8(fill_char,utf8);
    3.19 +    s=g_new(gchar,length*n+1);
    3.20 +    if (n==1)
    3.21 +	memset(s,utf8[0],length);
    3.22 +    else
    3.23 +	for(i=0;i<length;i++)
    3.24 +	    memcpy(s+i*n,utf8,n);
    3.25 +    s[length*n]='\0';
    3.26 +    return s;
    3.27 +}
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/bl/utf8.h	Wed Oct 02 23:51:18 2013 +0100
     4.3 @@ -0,0 +1,6 @@
     4.4 +#ifndef BL_UTF8_H
     4.5 +#define BL_UTF8_H
     4.6 +
     4.7 +gchar *utf8_strnfill(gsize length,gunichar fill_char);
     4.8 +
     4.9 +#endif	/* BL_UTF8_H */
     5.1 --- a/bookloupe/bookloupe.c	Mon Sep 23 21:18:27 2013 +0100
     5.2 +++ b/bookloupe/bookloupe.c	Wed Oct 02 23:51:18 2013 +0100
     5.3 @@ -160,8 +160,7 @@
     5.4      { NULL }
     5.5  };
     5.6  
     5.7 -long cnt_dquot;		/* for overview mode, count of doublequote queries */
     5.8 -long cnt_squot;		/* for overview mode, count of singlequote queries */
     5.9 +long cnt_quote;		/* for overview mode, count of quote queries */
    5.10  long cnt_brack;		/* for overview mode, count of brackets queries */
    5.11  long cnt_bin;		/* for overview mode, count of non-ASCII queries */
    5.12  long cnt_odd;		/* for overview mode, count of odd character queries */
    5.13 @@ -407,10 +406,8 @@
    5.14  	    g_print("    Line-end problems:	     %14ld\n",cnt_lineend);
    5.15  	if (cnt_word)
    5.16  	    g_print("    Common typos:		  %14ld\n",cnt_word);
    5.17 -	if (cnt_dquot)
    5.18 -	    g_print("    Unmatched quotes:	      %14ld\n",cnt_dquot);
    5.19 -	if (cnt_squot)
    5.20 -	    g_print("    Unmatched SingleQuotes:	%14ld\n",cnt_squot);
    5.21 +	if (cnt_quote)
    5.22 +	    g_print("    Unmatched quotes:	      %14ld\n",cnt_quote);
    5.23  	if (cnt_brack)
    5.24  	    g_print("    Unmatched brackets:	    %14ld\n",cnt_brack);
    5.25  	if (cnt_bin)
    5.26 @@ -425,8 +422,8 @@
    5.27  	    g_print("    Possible HTML tags:	    %14ld\n",cnt_html);
    5.28  	g_print("\n");
    5.29  	g_print("    TOTAL QUERIES		  %14ld\n",
    5.30 -	  cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+
    5.31 -	  cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend);
    5.32 +	  cnt_quote+cnt_brack+cnt_bin+cnt_odd+cnt_long+cnt_short+cnt_punct+
    5.33 +	  cnt_dash+cnt_word+cnt_html+cnt_lineend);
    5.34      }
    5.35      g_free(running_from);
    5.36      if (usertypo)
    5.37 @@ -453,6 +450,7 @@
    5.38      long spline=0,nspline=0;
    5.39      static struct first_pass_results results={0};
    5.40      gchar *inword;
    5.41 +    QuoteClass qc;
    5.42      lines=g_strsplit(etext,"\n",0);
    5.43      for (j=0;lines[j];j++)
    5.44      {
    5.45 @@ -507,9 +505,16 @@
    5.46  		results.binlen++;
    5.47  	    if (g_unichar_isalpha(g_utf8_get_char(s)))
    5.48  		results.alphalen++;
    5.49 -	    if (s>lines[j] && g_utf8_get_char(s)==CHAR_DQUOTE &&
    5.50 -	      isalpha(g_utf8_get_char(g_utf8_prev_char(s))))
    5.51 -		results.endquote_count++;
    5.52 +	    if (s>lines[j])
    5.53 +	    {
    5.54 +		if (CHAR_IS_DQUOTE(g_utf8_get_char(s)))
    5.55 +		    qc=QUOTE_CLASS(g_utf8_get_char(s));
    5.56 +		else
    5.57 +		    qc=INVALID_QUOTE;
    5.58 +		if ((qc==CLOSING_QUOTE || qc==NEUTRAL_QUOTE) &&
    5.59 +		  isalpha(g_utf8_get_char(g_utf8_prev_char(s))))
    5.60 +		    results.endquote_count++;
    5.61 +	    }
    5.62  	}
    5.63  	if (llen>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE && lastblen>2 &&
    5.64  	  lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE)
    5.65 @@ -788,7 +793,7 @@
    5.66   *
    5.67   * Returns: TRUE if the line is empty.
    5.68   */
    5.69 -gboolean analyse_quotes(const char *aline,struct counters *counters)
    5.70 +gboolean analyse_quotes(const char *aline,int linecnt,struct counters *counters)
    5.71  {
    5.72      int guessquote=0;
    5.73      /* assume the line is empty until proven otherwise */
    5.74 @@ -796,23 +801,24 @@
    5.75      const char *s=aline,*sprev,*snext;
    5.76      gunichar c;
    5.77      sprev=NULL;
    5.78 +    GError *tmp_err=NULL;
    5.79      while (*s)
    5.80      {
    5.81  	snext=g_utf8_next_char(s);
    5.82  	c=g_utf8_get_char(s);
    5.83 -	if (c==CHAR_DQUOTE)
    5.84 -	    counters->quot++;
    5.85 -	if (CHAR_IS_SQUOTE(c))
    5.86 +	if (CHAR_IS_DQUOTE(c))
    5.87 +	    (void)count_quote(counters,c,QUOTE_CLASS(c),&tmp_err);
    5.88 +	else if (CHAR_IS_SQUOTE(c) && pswit[SQUOTE_SWITCH])
    5.89  	{
    5.90  	    if (s==aline)
    5.91  	    {
    5.92  		/*
    5.93 -		 * At start of line, it can only be an openquote.
    5.94 +		 * At start of line, it can only be a quotation mark.
    5.95  		 * Hardcode a very common exception!
    5.96  		 */
    5.97  		if (!g_str_has_prefix(snext,"tis") &&
    5.98  		  !g_str_has_prefix(snext,"Tis"))
    5.99 -		    increment_matching(counters,c,TRUE);
   5.100 +		    (void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err);
   5.101  	    }
   5.102  	    else if (g_unichar_isalpha(g_utf8_get_char(sprev)) &&
   5.103  	      g_unichar_isalpha(g_utf8_get_char(snext)))
   5.104 @@ -822,15 +828,20 @@
   5.105  	    else if (c==CHAR_OPEN_SQUOTE || c==CHAR_LS_QUOTE ||
   5.106  	      g_unichar_isalpha(g_utf8_get_char(snext)))
   5.107  	    {
   5.108 -		/* it damwell better BE an openquote */
   5.109 +		/* certainly looks like a quotation mark */
   5.110  		if (!g_str_has_prefix(snext,"tis") &&
   5.111  		  !g_str_has_prefix(snext,"Tis"))
   5.112  		    /* hardcode a very common exception! */
   5.113 -		    increment_matching(counters,c,TRUE);
   5.114 +		{
   5.115 +		    if (strchr(".?!,;:",g_utf8_get_char(sprev)))
   5.116 +			(void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err);
   5.117 +		    else
   5.118 +			(void)count_quote(counters,c,OPENING_QUOTE,&tmp_err);
   5.119 +		}
   5.120  	    }
   5.121  	    else
   5.122  	    {
   5.123 -		/* now - is it a closequote? */
   5.124 +		/* now - is it a quotation mark? */
   5.125  		guessquote=0;   /* accumulate clues */
   5.126  		if (g_unichar_isalpha(g_utf8_get_char(sprev)))
   5.127  		{
   5.128 @@ -844,25 +855,31 @@
   5.129  			    /* bonus marks! */
   5.130  			    guessquote-=2;
   5.131  		    }
   5.132 +		    if (innermost_quote_matches(counters,c))
   5.133 +			/*
   5.134 +			 * Give it the benefit of some doubt,
   5.135 +			 * if a squote is already open.
   5.136 +			 */
   5.137 +			guessquote++;
   5.138 +		    else
   5.139 +			guessquote--;
   5.140 +		    if (guessquote>=0)
   5.141 +			(void)count_quote(counters,c,CLOSING_QUOTE,&tmp_err);
   5.142  		}
   5.143 -		/* it doesn't have a letter either side */
   5.144 -		else if (strchr(".?!,;:",g_utf8_get_char(sprev)) &&
   5.145 -		  strchr(".?!,;: ",g_utf8_get_char(snext)))
   5.146 -		    guessquote+=8; /* looks like a closequote */
   5.147  		else
   5.148 -		    guessquote++;
   5.149 -		if (matching_difference(counters,CHAR_SQUOTE)>0)
   5.150 -		    /*
   5.151 -		     * Give it the benefit of some doubt,
   5.152 -		     * if a squote is already open.
   5.153 -		     */
   5.154 -		    guessquote++;
   5.155 -		else
   5.156 -		    guessquote--;
   5.157 -		if (guessquote>=0)
   5.158 -		    increment_matching(counters,c,FALSE);
   5.159 +		    /* no adjacent letter - it must be a quote of some kind */
   5.160 +		    (void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err);
   5.161  	    }
   5.162  	}
   5.163 +	if (tmp_err)
   5.164 +	{
   5.165 +	    if (pswit[ECHO_SWITCH])
   5.166 +		g_print("\n%s\n",aline);
   5.167 +	    if (!pswit[OVERVIEW_SWITCH])
   5.168 +		g_print("    Line %ld column %ld - %s\n",
   5.169 +		  linecnt,g_utf8_pointer_to_offset(aline,s)+1,tmp_err->message);
   5.170 +	    g_clear_error(&tmp_err);
   5.171 +	}
   5.172  	if (c!=CHAR_SPACE && c!='-' && c!='.' && c!=CHAR_ASTERISK &&
   5.173  	  c!='\r' && c!='\n')
   5.174  	    isemptyline=FALSE;  /* ignore lines like  *  *  *  as spacers */
   5.175 @@ -1779,6 +1796,7 @@
   5.176      gboolean isacro,isellipsis;
   5.177      const char *s;
   5.178      gunichar c,nc,pc,n2c;
   5.179 +    int parity;
   5.180      c=g_utf8_get_char(aline);
   5.181      nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
   5.182      for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
   5.183 @@ -1917,7 +1935,7 @@
   5.184  	c=nc;
   5.185  	nc=g_utf8_get_char(g_utf8_next_char(s));
   5.186  	/* for each character in the line after the first */
   5.187 -	if (c==CHAR_DQUOTE)
   5.188 +	if (CHAR_IS_DQUOTE(c))
   5.189  	{
   5.190  	    if (!g_utf8_strchr(" _-.'`,;:!/([{?}])",-1,pc) &&
   5.191  	      !g_utf8_strchr(" _-.'`,;:!/([{?}])",-1,nc) && nc ||
   5.192 @@ -1939,10 +1957,18 @@
   5.193      {
   5.194  	c=nc;
   5.195  	nc=g_utf8_get_char(g_utf8_next_char(s));
   5.196 -	if (c==CHAR_DQUOTE)
   5.197 +	if (CHAR_IS_DQUOTE(c))
   5.198  	{
   5.199 -	    parities->dquote=!parities->dquote;
   5.200 -	    if (!parities->dquote)
   5.201 +	    if (c==CHAR_DQUOTE)
   5.202 +	    {
   5.203 +		parities->dquote=!parities->dquote;
   5.204 +		parity=parities->dquote;
   5.205 +	    }
   5.206 +	    else if (c==CHAR_LD_QUOTE)
   5.207 +		parity=1;
   5.208 +	    else
   5.209 +		parity=0;
   5.210 +	    if (!parity)
   5.211  	    {
   5.212  		/* parity even */
   5.213  		if (!g_utf8_strchr("_-.'`/,;:!?)]} ",-1,nc))
   5.214 @@ -1975,7 +2001,8 @@
   5.215  	    }
   5.216  	}
   5.217      }
   5.218 -    if (g_utf8_get_char(aline)==CHAR_DQUOTE)
   5.219 +    c=g_utf8_get_char(aline);
   5.220 +    if (CHAR_IS_DQUOTE(c))
   5.221      {
   5.222  	if (g_utf8_strchr(",;:!?)]} ",-1,
   5.223  	  g_utf8_get_char(g_utf8_next_char(aline))))
   5.224 @@ -2200,7 +2227,7 @@
   5.225  	s=g_utf8_prev_char(aline+lbytes);
   5.226  	c1=g_utf8_get_char(s);
   5.227  	c2=g_utf8_get_char(g_utf8_prev_char(s));
   5.228 -	if ((c1==CHAR_DQUOTE || CHAR_IS_SQUOTE(c1)) && c2==CHAR_SPACE)
   5.229 +	if ((CHAR_IS_DQUOTE(c1) || CHAR_IS_SQUOTE(c1)) && c2==CHAR_SPACE)
   5.230  	{
   5.231  	    if (pswit[ECHO_SWITCH])
   5.232  		g_print("\n%s\n",aline);
   5.233 @@ -2285,15 +2312,17 @@
   5.234  {
   5.235      const char *s;
   5.236      gunichar c,nc,pc;
   5.237 +    QuoteClass qc;
   5.238      c=g_utf8_get_char(aline);
   5.239      nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
   5.240      for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
   5.241      {
   5.242  	pc=c;
   5.243  	c=nc;
   5.244 +	qc=CHAR_IS_DQUOTE(c)?QUOTE_CLASS(c):INVALID_QUOTE;
   5.245  	nc=g_utf8_get_char(g_utf8_next_char(s));
   5.246  	/* for each character in the line except 1st */
   5.247 -	if (c==CHAR_DQUOTE && isalpha(pc))
   5.248 +	if ((qc==CLOSING_QUOTE || qc==NEUTRAL_QUOTE) && isalpha(pc))
   5.249  	{
   5.250  	    if (pswit[ECHO_SWITCH])
   5.251  		g_print("\n%s\n",aline);
   5.252 @@ -2396,6 +2425,7 @@
   5.253      gboolean letter_on_line=FALSE;
   5.254      const char *s;
   5.255      gunichar c;
   5.256 +    gboolean closing_quote;
   5.257      for (s=prevline;*s;s=g_utf8_next_char(s))
   5.258  	if (g_unichar_isalpha(g_utf8_get_char(s)))
   5.259  	{
   5.260 @@ -2417,7 +2447,11 @@
   5.261  	{
   5.262  	    s=g_utf8_prev_char(s);
   5.263  	    c=g_utf8_get_char(s);
   5.264 -	} while (CHAR_IS_CLOSING_QUOTE(c) && c>CHAR_SPACE && s>prevline);
   5.265 +	    if (QUOTE_CLASS(c)==CLOSING_QUOTE || QUOTE_CLASS(c)==NEUTRAL_QUOTE)
   5.266 +		closing_quote=TRUE;
   5.267 +	    else
   5.268 +		closing_quote=FALSE;
   5.269 +	} while (closing_quote && s>prevline);
   5.270  	for (;s>prevline;s=g_utf8_prev_char(s))
   5.271  	{
   5.272  	    if (g_unichar_isalpha(g_utf8_get_char(s)))
   5.273 @@ -2548,7 +2582,7 @@
   5.274  	}
   5.275  	checked_linecnt++;
   5.276  	print_pending(aline,parastart,&pending);
   5.277 -	isemptyline=analyse_quotes(aline,&counters);
   5.278 +	isemptyline=analyse_quotes(aline,linecnt,&counters);
   5.279  	if (isnewpara && !isemptyline)
   5.280  	{
   5.281  	    /* This line is the start of a new paragraph. */
     6.1 --- a/bookloupe/bookloupe.h	Mon Sep 23 21:18:27 2013 +0100
     6.2 +++ b/bookloupe/bookloupe.h	Wed Oct 02 23:51:18 2013 +0100
     6.3 @@ -24,15 +24,17 @@
     6.4  
     6.5  #define CHAR_LS_QUOTE	  0x2018
     6.6  #define CHAR_RS_QUOTE	  0x2019
     6.7 +#define CHAR_LD_QUOTE	  0x201C
     6.8 +#define CHAR_RD_QUOTE	  0x201D
     6.9  
    6.10  #define CHAR_IS_SQUOTE(c)	((c)==CHAR_SQUOTE || (c)==CHAR_OPEN_SQUOTE || \
    6.11  				 (c)==CHAR_LS_QUOTE || (c)==CHAR_RS_QUOTE)
    6.12  
    6.13 +#define CHAR_IS_DQUOTE(c)	((c)==CHAR_DQUOTE || (c)==CHAR_LD_QUOTE || \
    6.14 +				 (c)==CHAR_RD_QUOTE)
    6.15 +
    6.16  #define CHAR_IS_APOSTROPHE(c)	((c)==CHAR_SQUOTE || (c)==CHAR_RS_QUOTE)
    6.17  
    6.18 -#define CHAR_IS_CLOSING_QUOTE(c) \
    6.19 -    ((c)==CHAR_DQUOTE || (c)==CHAR_SQUOTE || (c)==CHAR_RS_QUOTE)
    6.20 -
    6.21  /* longest and shortest normal PG line lengths */
    6.22  #define LONGEST_PG_LINE   75
    6.23  #define WAY_TOO_LONG      80
    6.24 @@ -81,8 +83,8 @@
    6.25  
    6.26  extern gboolean pswit[SWITNO];
    6.27  
    6.28 -extern long cnt_dquot,cnt_squot,cnt_brack,cnt_bin,cnt_odd,cnt_long,cnt_short;
    6.29 -extern long cnt_punct,cnt_dash,cnt_word,cnt_html,cnt_lineend,cnt_spacend;
    6.30 -extern long linecnt,checked_linecnt;
    6.31 +extern long cnt_quote,cnt_brack,cnt_bin,cnt_odd,cnt_long,cnt_short,cnt_punct;
    6.32 +extern long cnt_dash,cnt_word,cnt_html,cnt_lineend,cnt_spacend,linecnt;
    6.33 +extern long checked_linecnt;
    6.34  
    6.35  #endif /* BOOKOUPE_H */
     7.1 --- a/bookloupe/counters.c	Mon Sep 23 21:18:27 2013 +0100
     7.2 +++ b/bookloupe/counters.c	Wed Oct 02 23:51:18 2013 +0100
     7.3 @@ -8,6 +8,14 @@
     7.4      int open,close;
     7.5  };
     7.6  
     7.7 +GQuark counters_error_quark(void)
     7.8 +{
     7.9 +    static GQuark quark;
    7.10 +    if (!quark)
    7.11 +	quark=g_quark_from_static_string("counters_error");
    7.12 +    return quark;
    7.13 +}
    7.14 +
    7.15  static struct matching_counter *matching_counter_new(void)
    7.16  {
    7.17      return g_slice_new0(struct matching_counter);
    7.18 @@ -45,11 +53,64 @@
    7.19  	return GINT_TO_POINTER((gint)CHAR_SQUOTE);
    7.20      else if (ch==CHAR_LS_QUOTE || ch==CHAR_RS_QUOTE)
    7.21  	return GINT_TO_POINTER((gint)CHAR_LS_QUOTE);
    7.22 +    else if (ch==CHAR_LD_QUOTE || ch==CHAR_RD_QUOTE)
    7.23 +	return GINT_TO_POINTER((gint)CHAR_LD_QUOTE);
    7.24 +    else if (ch==CHAR_DQUOTE)
    7.25 +	return GINT_TO_POINTER((gint)ch);
    7.26      else if (ch<0x4000 || ch-0x4000>=NO_SPECIAL_COUNTERS)
    7.27 +	g_warning("Matching pair not found for U+%04" G_GINT32_MODIFIER "X",ch);
    7.28 +    return GINT_TO_POINTER((gint)ch);
    7.29 +}
    7.30 +
    7.31 +gboolean innermost_quote_matches(struct counters *counters,gunichar ch)
    7.32 +{
    7.33 +    gpointer head;
    7.34 +    if (counters->open_quotes)
    7.35 +	head=counters->open_quotes->data;
    7.36 +    else
    7.37 +	head=NULL;
    7.38 +    return head==matching_key(ch);
    7.39 +}
    7.40 +
    7.41 +gboolean count_quote(struct counters *counters,gunichar ch,QuoteClass klass,
    7.42 +  GError **err)
    7.43 +{
    7.44 +    gboolean retval=TRUE;
    7.45 +    gpointer head;
    7.46 +    if (counters->open_quotes)
    7.47 +	head=counters->open_quotes->data;
    7.48 +    else
    7.49 +	head=NULL;
    7.50 +    switch(klass)
    7.51      {
    7.52 -	g_warning("Matching pair not found for U+%04" G_GINT32_MODIFIER "X",ch);
    7.53 -	return GINT_TO_POINTER((gint)ch);
    7.54 +	case NEUTRAL_QUOTE:
    7.55 +	    if (head!=matching_key(ch))
    7.56 +		goto opening;
    7.57 +	    /* else fall through */
    7.58 +	case CLOSING_QUOTE:
    7.59 +	    if (head!=matching_key(ch))
    7.60 +	    {
    7.61 +		g_set_error(err,COUNTERS_ERROR,COUNTERS_ERROR_FAILED,
    7.62 +		  "Closing quotation mark with no matching open?");
    7.63 +		retval=FALSE;
    7.64 +	    }
    7.65 +	    else
    7.66 +		counters->open_quotes=g_slist_delete_link(counters->open_quotes,
    7.67 +		  counters->open_quotes);
    7.68 +	    break;
    7.69 +	case OPENING_QUOTE:
    7.70 +	    if (head==matching_key(ch))
    7.71 +	    {
    7.72 +		g_set_error(err,COUNTERS_ERROR,COUNTERS_ERROR_FAILED,
    7.73 +		  "Directly nested quotation marks of same type?");
    7.74 +		retval=FALSE;
    7.75 +	    }
    7.76 +opening:
    7.77 +	    head=matching_key(ch);
    7.78 +	    counters->open_quotes=g_slist_prepend(counters->open_quotes,head);
    7.79 +	    break;
    7.80      }
    7.81 +    return retval;
    7.82  }
    7.83  
    7.84  void increment_matching(struct counters *counters,gunichar ch,gboolean open)
     8.1 --- a/bookloupe/counters.h	Mon Sep 23 21:18:27 2013 +0100
     8.2 +++ b/bookloupe/counters.h	Wed Oct 02 23:51:18 2013 +0100
     8.3 @@ -3,18 +3,42 @@
     8.4  
     8.5  #include <glib.h>
     8.6  
     8.7 +#define COUNTERS_ERROR counters_error_quark()
     8.8 +
     8.9 +typedef enum
    8.10 +{
    8.11 +    COUNTERS_ERROR_FAILED,                 /* Generic failure */
    8.12 +} CountersError;
    8.13 +
    8.14  /* Special counters live in the private use area */
    8.15  enum {
    8.16      COUNTER_ILLUSTRATION=0xE000,
    8.17      NO_SPECIAL_COUNTERS
    8.18  };
    8.19  
    8.20 +typedef enum {
    8.21 +    OPENING_QUOTE,
    8.22 +    CLOSING_QUOTE,
    8.23 +    NEUTRAL_QUOTE,
    8.24 +    INVALID_QUOTE
    8.25 +} QuoteClass;
    8.26 +
    8.27 +#define QUOTE_CLASS(c) \
    8.28 +    (((c)==CHAR_RD_QUOTE || (c)==CHAR_RS_QUOTE)?CLOSING_QUOTE: \
    8.29 +     ((c)==CHAR_LD_QUOTE || (c)==CHAR_LS_QUOTE || (c)==CHAR_OPEN_SQUOTE)?\
    8.30 +     OPENING_QUOTE:((c)==CHAR_DQUOTE || (c)==CHAR_SQUOTE)?NEUTRAL_QUOTE:\
    8.31 +     INVALID_QUOTE)
    8.32 +
    8.33  struct counters {
    8.34      GTree *matching;
    8.35 -    long quot;
    8.36      int c_unders;
    8.37 +    GSList *open_quotes;
    8.38  };
    8.39  
    8.40 +GQuark counters_error_quark(void);
    8.41 +gboolean innermost_quote_matches(struct counters *counters,gunichar ch);
    8.42 +gboolean count_quote(struct counters *counters,gunichar ch,QuoteClass klass,
    8.43 +  GError **err);
    8.44  void increment_matching(struct counters *counters,gunichar ch,gboolean open);
    8.45  int matching_count(const struct counters *counters,gunichar ch,gboolean open);
    8.46  int matching_difference(const struct counters *counters,gunichar ch);
     9.1 --- a/bookloupe/pending.c	Mon Sep 23 21:18:27 2013 +0100
     9.2 +++ b/bookloupe/pending.c	Wed Oct 02 23:51:18 2013 +0100
     9.3 @@ -1,6 +1,7 @@
     9.4  #include <stdlib.h>
     9.5  #include <string.h>
     9.6  #include <glib.h>
     9.7 +#include <bl/bl.h>
     9.8  #include "bookloupe.h"
     9.9  #include "pending.h"
    9.10  
    9.11 @@ -15,20 +16,9 @@
    9.12  void print_pending(const char *aline,const char *parastart,
    9.13    struct pending *pending)
    9.14  {
    9.15 -    const char *s;
    9.16 -    gunichar c;
    9.17      if (aline)
    9.18 -    {
    9.19 -	s=aline;
    9.20 -	while (*s==' ')
    9.21 -	    s++;
    9.22 -	c=g_utf8_get_char(s);
    9.23 -    }
    9.24 -    else
    9.25 -    {
    9.26 -	s=NULL;
    9.27 -	c='\0';
    9.28 -    }
    9.29 +	while (g_unichar_isspace(g_utf8_get_char(aline)))
    9.30 +	    aline=g_utf8_next_char(aline);
    9.31      if (pending->illustration.warning_text)
    9.32      {
    9.33  	if (aline)
    9.34 @@ -52,38 +42,25 @@
    9.35  	    pending->illustration.queried_line=NULL;
    9.36  	}
    9.37      }
    9.38 -    if (pending->dquote)
    9.39 +    if (pending->quote)
    9.40      {
    9.41 -	if (c!=CHAR_DQUOTE || pswit[QPARA_SWITCH])
    9.42 +	if (!pending->continuing_quote || !aline ||
    9.43 +	  !g_str_has_prefix(aline,pending->continuing_quote))
    9.44  	{
    9.45  	    if (!pswit[OVERVIEW_SWITCH])
    9.46  	    {
    9.47  		if (pswit[ECHO_SWITCH])
    9.48  		    g_print("\n%s\n",parastart);
    9.49 -		g_print("%s\n",pending->dquote);
    9.50 +		g_print("%s\n",pending->quote);
    9.51  	    }
    9.52  	    else
    9.53 -		cnt_dquot++;
    9.54 +		cnt_quote++;
    9.55  	}
    9.56 -	g_free(pending->dquote);
    9.57 -	pending->dquote=NULL;
    9.58 +	g_free(pending->quote);
    9.59 +	pending->quote=NULL;
    9.60      }
    9.61 -    if (pending->squote)
    9.62 -    {
    9.63 -	if (!CHAR_IS_SQUOTE(c) || pswit[QPARA_SWITCH] || pending->squot)
    9.64 -	{
    9.65 -	    if (!pswit[OVERVIEW_SWITCH])
    9.66 -	    {
    9.67 -		if (pswit[ECHO_SWITCH])
    9.68 -		    g_print("\n%s\n",parastart);
    9.69 -		g_print("%s\n",pending->squote);
    9.70 -	    }
    9.71 -	    else
    9.72 -		cnt_squot++;
    9.73 -	}
    9.74 -	g_free(pending->squote);
    9.75 -	pending->squote=NULL;
    9.76 -    }
    9.77 +    g_free(pending->continuing_quote);
    9.78 +    pending->continuing_quote=NULL;
    9.79      if (pending->rbrack)
    9.80      {
    9.81  	if (!pswit[OVERVIEW_SWITCH])
    9.82 @@ -159,34 +136,35 @@
    9.83   * quotes on _every_ paragraph, whether the next begins with a
    9.84   * quote or not.
    9.85   */
    9.86 -void check_for_mismatched_quotes(const struct counters *counters,
    9.87 +void check_for_mismatched_quotes(struct counters *counters,
    9.88    struct pending *pending)
    9.89  {
    9.90 -    int squote_straight,squote_curved,difference;
    9.91 -    if (counters->quot%2)
    9.92 -	pending->dquote=
    9.93 -	  g_strdup_printf("    Line %ld - Mismatched quotes",linecnt);
    9.94 -    if (pswit[SQUOTE_SWITCH])
    9.95 +    gboolean all_single;
    9.96 +    gunichar c;
    9.97 +    int difference;
    9.98 +    const char *quote_type;
    9.99 +    GString *str;
   9.100 +    if (counters->open_quotes)
   9.101      {
   9.102 -	if (matching_count(counters,CHAR_SQUOTE,TRUE))
   9.103 -	    squote_straight=matching_difference(counters,CHAR_SQUOTE);
   9.104 +	str=g_string_new(NULL);
   9.105 +	counters->open_quotes=g_slist_reverse(counters->open_quotes);
   9.106 +	all_single=TRUE;
   9.107 +	while(counters->open_quotes)
   9.108 +	{
   9.109 +	    c=GPOINTER_TO_INT(counters->open_quotes->data);
   9.110 +	    if (!CHAR_IS_SQUOTE(c))
   9.111 +		all_single=FALSE;
   9.112 +	    g_string_append_unichar(str,c);
   9.113 +	    counters->open_quotes=g_slist_delete_link(counters->open_quotes,
   9.114 +	      counters->open_quotes);
   9.115 +	}
   9.116 +	pending->continuing_quote=g_string_free(str,FALSE);
   9.117 +	if (all_single)
   9.118 +	    quote_type="singlequotes?";
   9.119  	else
   9.120 -	    squote_straight=0;
   9.121 -	if (matching_count(counters,CHAR_LS_QUOTE,TRUE))
   9.122 -	    squote_curved=matching_difference(counters,CHAR_LS_QUOTE);
   9.123 -	else
   9.124 -	    squote_curved=0;
   9.125 -	if (squote_straight || squote_curved)
   9.126 -	    pending->squote=
   9.127 -	      g_strdup_printf("    Line %ld - Mismatched singlequotes?",
   9.128 -	      linecnt);
   9.129 -	if (squote_straight && squote_straight!=1 ||
   9.130 -	  squote_curved && squote_curved!=1)
   9.131 -	    /*
   9.132 -	     * Flag it to be noted regardless of the
   9.133 -	     * first char of the next para.
   9.134 -	     */
   9.135 -	    pending->squot=1;
   9.136 +	    quote_type="quotes";
   9.137 +	pending->quote=g_strdup_printf("    Line %ld - Mismatched %s",linecnt,
   9.138 +	  quote_type);
   9.139      }
   9.140      difference=matching_difference(counters,COUNTER_ILLUSTRATION);
   9.141      if (difference)
    10.1 --- a/bookloupe/pending.h	Mon Sep 23 21:18:27 2013 +0100
    10.2 +++ b/bookloupe/pending.h	Wed Oct 02 23:51:18 2013 +0100
    10.3 @@ -9,15 +9,15 @@
    10.4  };
    10.5  
    10.6  struct pending {
    10.7 -    char *dquote,*squote,*rbrack,*sbrack,*cbrack,*unders;
    10.8 -    long squot;
    10.9 +    char *quote,*rbrack,*sbrack,*cbrack,*unders;
   10.10 +    char *continuing_quote;
   10.11      struct pending_warning illustration;
   10.12  };
   10.13  
   10.14  void print_pending(const char *aline,const char *parastart,
   10.15    struct pending *pending);
   10.16  void reset_pending(struct pending *pending);
   10.17 -void check_for_mismatched_quotes(const struct counters *counters,
   10.18 +void check_for_mismatched_quotes(struct counters *counters,
   10.19    struct pending *pending);
   10.20  
   10.21  #endif /* PENDING_H */
    11.1 --- a/test/bookloupe/Makefile.am	Mon Sep 23 21:18:27 2013 +0100
    11.2 +++ b/test/bookloupe/Makefile.am	Wed Oct 02 23:51:18 2013 +0100
    11.3 @@ -1,5 +1,5 @@
    11.4  TESTS_ENVIRONMENT=BOOKLOUPE=../../bookloupe/bookloupe ../harness/loupe-test
    11.5 -TESTS=non-ascii.tst long-line.tst curved-single-quotes.tst \
    11.6 -	curved-genitives.tst multi-line-illustration.tst
    11.7 +TESTS=non-ascii.tst long-line.tst curved-single-quotes.tst curved-quotes.tst \
    11.8 +	runfox-quotes.tst curved-genitives.tst multi-line-illustration.tst
    11.9  
   11.10  dist_pkgdata_DATA=$(TESTS)
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/test/bookloupe/curved-quotes.tst	Wed Oct 02 23:51:18 2013 +0100
    12.3 @@ -0,0 +1,48 @@
    12.4 +**************** INPUT ****************
    12.5 +When Tom had made fast his motorboat, he went to the rowing craft to
    12.6 +see if it was in good condition. He saw a piece of paper on one of the
    12.7 +seats, held down by a little stone. Picking it up he read:
    12.8 +
    12.9 +     “Many thanks for the use of your boat. I had a fine row, and
   12.10 +     I feel better, though I’m as much up a tree as ever. I hope
   12.11 +     to see you again, sometime. If ever you are near Elmwood Hall,
   12.12 +     look me up.
   12.13 +
   12.14 +     “BRUCE BENNINGTON.”
   12.15 +
   12.16 +That was nice of him,” remarked Will, as Tom showed him the note.
   12.17 +
   12.18 +“And he didn’t damage your boat any," spoke Dick.
   12.19 +
   12.20 +"No, he knows how to handle ’em--he rows on the Elmwood Hall crew,” said
   12.21 +Tom. “Well, so long, fellows. I’m going for a long run to-morrow, if
   12.22 +you’d like to come.”
   12.23 +
   12.24 +“Sure! they chorused.
   12.25 +**************** WARNINGS ****************
   12.26 +<expected>
   12.27 +  <error>
   12.28 +    <at line="12" column="22"/>
   12.29 +    <text>Closing quotation mark with no matching open?</text>
   12.30 +  </error>
   12.31 +  <error>
   12.32 +    <at line="15"/>
   12.33 +    <text>Mismatched quotes</text>
   12.34 +  </error>
   12.35 +  <error>
   12.36 +    <at line="16" column="67"/>
   12.37 +    <text>Closing quotation mark with no matching open?</text>
   12.38 +  </error>
   12.39 +  <error>
   12.40 +    <at line="19"/>
   12.41 +    <text>Mismatched quotes</text>
   12.42 +  </error>
   12.43 +  <error>
   12.44 +    <at line="21"/>
   12.45 +    <text>Mismatched quotes</text>
   12.46 +  </error>
   12.47 +  <false-positive>
   12.48 +    <at line="14" column="37"/>
   12.49 +    <text>Wrongspaced quotes?</text>
   12.50 +  </false-positive>
   12.51 +</expected>
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/test/bookloupe/runfox-quotes.tst	Wed Oct 02 23:51:18 2013 +0100
    13.3 @@ -0,0 +1,17 @@
    13.4 +**************** INPUT ****************
    13.5 +“Now I see how it happened,” said Spotted Deer. “If you were not very
    13.6 +strong you would have been dead. Yes, if you were not a good war-leader
    13.7 +you would not have come back here. ”We will not talk any more about it.“
    13.8 +
    13.9 +“Well, what did you find?” inquired Running Fox.
   13.10 +**************** WARNINGS ****************
   13.11 +<expected>
   13.12 +  <error>
   13.13 +    <at line="3" column="36"/>
   13.14 +    <text>Wrongspaced quotes?</text>
   13.15 +  </error>
   13.16 +  <error>
   13.17 +    <at line="3" column="72"/>
   13.18 +    <text>Wrongspaced quotes?</text>
   13.19 +  </error>
   13.20 +</expected>
    14.1 --- a/test/compatibility/Makefile.am	Mon Sep 23 21:18:27 2013 +0100
    14.2 +++ b/test/compatibility/Makefile.am	Wed Oct 02 23:51:18 2013 +0100
    14.3 @@ -7,6 +7,7 @@
    14.4  	dashes.tst control-characters.tst unusual-characters.tst \
    14.5  	windows-1252.tst periods.tst long-line.tst unmarked-paragraph.tst \
    14.6  	hebe-jeebies.tst mail-from.tst scannos.tst before-comma.tst \
    14.7 -	before-period.tst double-punctuation.tst genitives.tst embedded-cr.tst
    14.8 +	before-period.tst double-punctuation.tst genitives.tst embedded-cr.tst \
    14.9 +	continuing-quotes.tst
   14.10  
   14.11  dist_pkgdata_DATA=$(TESTS)
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/test/compatibility/continuing-quotes.tst	Wed Oct 02 23:51:18 2013 +0100
    15.3 @@ -0,0 +1,14 @@
    15.4 +**************** INPUT ****************
    15.5 +When Tom had made fast his motorboat, he went to the rowing craft to
    15.6 +see if it was in good condition. He saw a piece of paper on one of the
    15.7 +seats, held down by a little stone. Picking it up he read:
    15.8 +
    15.9 +     "Many thanks for the use of your boat. I had a fine row, and
   15.10 +     I feel better, though I'm as much up a tree as ever. I hope
   15.11 +     to see you again, sometime. If ever you are near Elmwood Hall,
   15.12 +     look me up.
   15.13 +
   15.14 +     "BRUCE BENNINGTON."
   15.15 +
   15.16 +"That was nice of him," remarked Will, as Tom showed him the note.
   15.17 +**************** EXPECTED ****************