bookloupe/bookloupe.c
changeset 142 466f43a12118
parent 103 adc06e9e8470
child 144 d7a97f077f9e
child 147 bb31577536d1
child 150 fd584db1d305
child 151 a485f5dcc2de
child 158 408b56cff0bd
     1.1 --- a/bookloupe/bookloupe.c	Mon Sep 23 21:18:27 2013 +0100
     1.2 +++ b/bookloupe/bookloupe.c	Wed Oct 02 23:51:18 2013 +0100
     1.3 @@ -160,8 +160,7 @@
     1.4      { NULL }
     1.5  };
     1.6  
     1.7 -long cnt_dquot;		/* for overview mode, count of doublequote queries */
     1.8 -long cnt_squot;		/* for overview mode, count of singlequote queries */
     1.9 +long cnt_quote;		/* for overview mode, count of quote queries */
    1.10  long cnt_brack;		/* for overview mode, count of brackets queries */
    1.11  long cnt_bin;		/* for overview mode, count of non-ASCII queries */
    1.12  long cnt_odd;		/* for overview mode, count of odd character queries */
    1.13 @@ -407,10 +406,8 @@
    1.14  	    g_print("    Line-end problems:	     %14ld\n",cnt_lineend);
    1.15  	if (cnt_word)
    1.16  	    g_print("    Common typos:		  %14ld\n",cnt_word);
    1.17 -	if (cnt_dquot)
    1.18 -	    g_print("    Unmatched quotes:	      %14ld\n",cnt_dquot);
    1.19 -	if (cnt_squot)
    1.20 -	    g_print("    Unmatched SingleQuotes:	%14ld\n",cnt_squot);
    1.21 +	if (cnt_quote)
    1.22 +	    g_print("    Unmatched quotes:	      %14ld\n",cnt_quote);
    1.23  	if (cnt_brack)
    1.24  	    g_print("    Unmatched brackets:	    %14ld\n",cnt_brack);
    1.25  	if (cnt_bin)
    1.26 @@ -425,8 +422,8 @@
    1.27  	    g_print("    Possible HTML tags:	    %14ld\n",cnt_html);
    1.28  	g_print("\n");
    1.29  	g_print("    TOTAL QUERIES		  %14ld\n",
    1.30 -	  cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+
    1.31 -	  cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend);
    1.32 +	  cnt_quote+cnt_brack+cnt_bin+cnt_odd+cnt_long+cnt_short+cnt_punct+
    1.33 +	  cnt_dash+cnt_word+cnt_html+cnt_lineend);
    1.34      }
    1.35      g_free(running_from);
    1.36      if (usertypo)
    1.37 @@ -453,6 +450,7 @@
    1.38      long spline=0,nspline=0;
    1.39      static struct first_pass_results results={0};
    1.40      gchar *inword;
    1.41 +    QuoteClass qc;
    1.42      lines=g_strsplit(etext,"\n",0);
    1.43      for (j=0;lines[j];j++)
    1.44      {
    1.45 @@ -507,9 +505,16 @@
    1.46  		results.binlen++;
    1.47  	    if (g_unichar_isalpha(g_utf8_get_char(s)))
    1.48  		results.alphalen++;
    1.49 -	    if (s>lines[j] && g_utf8_get_char(s)==CHAR_DQUOTE &&
    1.50 -	      isalpha(g_utf8_get_char(g_utf8_prev_char(s))))
    1.51 -		results.endquote_count++;
    1.52 +	    if (s>lines[j])
    1.53 +	    {
    1.54 +		if (CHAR_IS_DQUOTE(g_utf8_get_char(s)))
    1.55 +		    qc=QUOTE_CLASS(g_utf8_get_char(s));
    1.56 +		else
    1.57 +		    qc=INVALID_QUOTE;
    1.58 +		if ((qc==CLOSING_QUOTE || qc==NEUTRAL_QUOTE) &&
    1.59 +		  isalpha(g_utf8_get_char(g_utf8_prev_char(s))))
    1.60 +		    results.endquote_count++;
    1.61 +	    }
    1.62  	}
    1.63  	if (llen>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE && lastblen>2 &&
    1.64  	  lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE)
    1.65 @@ -788,7 +793,7 @@
    1.66   *
    1.67   * Returns: TRUE if the line is empty.
    1.68   */
    1.69 -gboolean analyse_quotes(const char *aline,struct counters *counters)
    1.70 +gboolean analyse_quotes(const char *aline,int linecnt,struct counters *counters)
    1.71  {
    1.72      int guessquote=0;
    1.73      /* assume the line is empty until proven otherwise */
    1.74 @@ -796,23 +801,24 @@
    1.75      const char *s=aline,*sprev,*snext;
    1.76      gunichar c;
    1.77      sprev=NULL;
    1.78 +    GError *tmp_err=NULL;
    1.79      while (*s)
    1.80      {
    1.81  	snext=g_utf8_next_char(s);
    1.82  	c=g_utf8_get_char(s);
    1.83 -	if (c==CHAR_DQUOTE)
    1.84 -	    counters->quot++;
    1.85 -	if (CHAR_IS_SQUOTE(c))
    1.86 +	if (CHAR_IS_DQUOTE(c))
    1.87 +	    (void)count_quote(counters,c,QUOTE_CLASS(c),&tmp_err);
    1.88 +	else if (CHAR_IS_SQUOTE(c) && pswit[SQUOTE_SWITCH])
    1.89  	{
    1.90  	    if (s==aline)
    1.91  	    {
    1.92  		/*
    1.93 -		 * At start of line, it can only be an openquote.
    1.94 +		 * At start of line, it can only be a quotation mark.
    1.95  		 * Hardcode a very common exception!
    1.96  		 */
    1.97  		if (!g_str_has_prefix(snext,"tis") &&
    1.98  		  !g_str_has_prefix(snext,"Tis"))
    1.99 -		    increment_matching(counters,c,TRUE);
   1.100 +		    (void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err);
   1.101  	    }
   1.102  	    else if (g_unichar_isalpha(g_utf8_get_char(sprev)) &&
   1.103  	      g_unichar_isalpha(g_utf8_get_char(snext)))
   1.104 @@ -822,15 +828,20 @@
   1.105  	    else if (c==CHAR_OPEN_SQUOTE || c==CHAR_LS_QUOTE ||
   1.106  	      g_unichar_isalpha(g_utf8_get_char(snext)))
   1.107  	    {
   1.108 -		/* it damwell better BE an openquote */
   1.109 +		/* certainly looks like a quotation mark */
   1.110  		if (!g_str_has_prefix(snext,"tis") &&
   1.111  		  !g_str_has_prefix(snext,"Tis"))
   1.112  		    /* hardcode a very common exception! */
   1.113 -		    increment_matching(counters,c,TRUE);
   1.114 +		{
   1.115 +		    if (strchr(".?!,;:",g_utf8_get_char(sprev)))
   1.116 +			(void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err);
   1.117 +		    else
   1.118 +			(void)count_quote(counters,c,OPENING_QUOTE,&tmp_err);
   1.119 +		}
   1.120  	    }
   1.121  	    else
   1.122  	    {
   1.123 -		/* now - is it a closequote? */
   1.124 +		/* now - is it a quotation mark? */
   1.125  		guessquote=0;   /* accumulate clues */
   1.126  		if (g_unichar_isalpha(g_utf8_get_char(sprev)))
   1.127  		{
   1.128 @@ -844,25 +855,31 @@
   1.129  			    /* bonus marks! */
   1.130  			    guessquote-=2;
   1.131  		    }
   1.132 +		    if (innermost_quote_matches(counters,c))
   1.133 +			/*
   1.134 +			 * Give it the benefit of some doubt,
   1.135 +			 * if a squote is already open.
   1.136 +			 */
   1.137 +			guessquote++;
   1.138 +		    else
   1.139 +			guessquote--;
   1.140 +		    if (guessquote>=0)
   1.141 +			(void)count_quote(counters,c,CLOSING_QUOTE,&tmp_err);
   1.142  		}
   1.143 -		/* it doesn't have a letter either side */
   1.144 -		else if (strchr(".?!,;:",g_utf8_get_char(sprev)) &&
   1.145 -		  strchr(".?!,;: ",g_utf8_get_char(snext)))
   1.146 -		    guessquote+=8; /* looks like a closequote */
   1.147  		else
   1.148 -		    guessquote++;
   1.149 -		if (matching_difference(counters,CHAR_SQUOTE)>0)
   1.150 -		    /*
   1.151 -		     * Give it the benefit of some doubt,
   1.152 -		     * if a squote is already open.
   1.153 -		     */
   1.154 -		    guessquote++;
   1.155 -		else
   1.156 -		    guessquote--;
   1.157 -		if (guessquote>=0)
   1.158 -		    increment_matching(counters,c,FALSE);
   1.159 +		    /* no adjacent letter - it must be a quote of some kind */
   1.160 +		    (void)count_quote(counters,c,NEUTRAL_QUOTE,&tmp_err);
   1.161  	    }
   1.162  	}
   1.163 +	if (tmp_err)
   1.164 +	{
   1.165 +	    if (pswit[ECHO_SWITCH])
   1.166 +		g_print("\n%s\n",aline);
   1.167 +	    if (!pswit[OVERVIEW_SWITCH])
   1.168 +		g_print("    Line %ld column %ld - %s\n",
   1.169 +		  linecnt,g_utf8_pointer_to_offset(aline,s)+1,tmp_err->message);
   1.170 +	    g_clear_error(&tmp_err);
   1.171 +	}
   1.172  	if (c!=CHAR_SPACE && c!='-' && c!='.' && c!=CHAR_ASTERISK &&
   1.173  	  c!='\r' && c!='\n')
   1.174  	    isemptyline=FALSE;  /* ignore lines like  *  *  *  as spacers */
   1.175 @@ -1779,6 +1796,7 @@
   1.176      gboolean isacro,isellipsis;
   1.177      const char *s;
   1.178      gunichar c,nc,pc,n2c;
   1.179 +    int parity;
   1.180      c=g_utf8_get_char(aline);
   1.181      nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
   1.182      for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
   1.183 @@ -1917,7 +1935,7 @@
   1.184  	c=nc;
   1.185  	nc=g_utf8_get_char(g_utf8_next_char(s));
   1.186  	/* for each character in the line after the first */
   1.187 -	if (c==CHAR_DQUOTE)
   1.188 +	if (CHAR_IS_DQUOTE(c))
   1.189  	{
   1.190  	    if (!g_utf8_strchr(" _-.'`,;:!/([{?}])",-1,pc) &&
   1.191  	      !g_utf8_strchr(" _-.'`,;:!/([{?}])",-1,nc) && nc ||
   1.192 @@ -1939,10 +1957,18 @@
   1.193      {
   1.194  	c=nc;
   1.195  	nc=g_utf8_get_char(g_utf8_next_char(s));
   1.196 -	if (c==CHAR_DQUOTE)
   1.197 +	if (CHAR_IS_DQUOTE(c))
   1.198  	{
   1.199 -	    parities->dquote=!parities->dquote;
   1.200 -	    if (!parities->dquote)
   1.201 +	    if (c==CHAR_DQUOTE)
   1.202 +	    {
   1.203 +		parities->dquote=!parities->dquote;
   1.204 +		parity=parities->dquote;
   1.205 +	    }
   1.206 +	    else if (c==CHAR_LD_QUOTE)
   1.207 +		parity=1;
   1.208 +	    else
   1.209 +		parity=0;
   1.210 +	    if (!parity)
   1.211  	    {
   1.212  		/* parity even */
   1.213  		if (!g_utf8_strchr("_-.'`/,;:!?)]} ",-1,nc))
   1.214 @@ -1975,7 +2001,8 @@
   1.215  	    }
   1.216  	}
   1.217      }
   1.218 -    if (g_utf8_get_char(aline)==CHAR_DQUOTE)
   1.219 +    c=g_utf8_get_char(aline);
   1.220 +    if (CHAR_IS_DQUOTE(c))
   1.221      {
   1.222  	if (g_utf8_strchr(",;:!?)]} ",-1,
   1.223  	  g_utf8_get_char(g_utf8_next_char(aline))))
   1.224 @@ -2200,7 +2227,7 @@
   1.225  	s=g_utf8_prev_char(aline+lbytes);
   1.226  	c1=g_utf8_get_char(s);
   1.227  	c2=g_utf8_get_char(g_utf8_prev_char(s));
   1.228 -	if ((c1==CHAR_DQUOTE || CHAR_IS_SQUOTE(c1)) && c2==CHAR_SPACE)
   1.229 +	if ((CHAR_IS_DQUOTE(c1) || CHAR_IS_SQUOTE(c1)) && c2==CHAR_SPACE)
   1.230  	{
   1.231  	    if (pswit[ECHO_SWITCH])
   1.232  		g_print("\n%s\n",aline);
   1.233 @@ -2285,15 +2312,17 @@
   1.234  {
   1.235      const char *s;
   1.236      gunichar c,nc,pc;
   1.237 +    QuoteClass qc;
   1.238      c=g_utf8_get_char(aline);
   1.239      nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
   1.240      for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
   1.241      {
   1.242  	pc=c;
   1.243  	c=nc;
   1.244 +	qc=CHAR_IS_DQUOTE(c)?QUOTE_CLASS(c):INVALID_QUOTE;
   1.245  	nc=g_utf8_get_char(g_utf8_next_char(s));
   1.246  	/* for each character in the line except 1st */
   1.247 -	if (c==CHAR_DQUOTE && isalpha(pc))
   1.248 +	if ((qc==CLOSING_QUOTE || qc==NEUTRAL_QUOTE) && isalpha(pc))
   1.249  	{
   1.250  	    if (pswit[ECHO_SWITCH])
   1.251  		g_print("\n%s\n",aline);
   1.252 @@ -2396,6 +2425,7 @@
   1.253      gboolean letter_on_line=FALSE;
   1.254      const char *s;
   1.255      gunichar c;
   1.256 +    gboolean closing_quote;
   1.257      for (s=prevline;*s;s=g_utf8_next_char(s))
   1.258  	if (g_unichar_isalpha(g_utf8_get_char(s)))
   1.259  	{
   1.260 @@ -2417,7 +2447,11 @@
   1.261  	{
   1.262  	    s=g_utf8_prev_char(s);
   1.263  	    c=g_utf8_get_char(s);
   1.264 -	} while (CHAR_IS_CLOSING_QUOTE(c) && c>CHAR_SPACE && s>prevline);
   1.265 +	    if (QUOTE_CLASS(c)==CLOSING_QUOTE || QUOTE_CLASS(c)==NEUTRAL_QUOTE)
   1.266 +		closing_quote=TRUE;
   1.267 +	    else
   1.268 +		closing_quote=FALSE;
   1.269 +	} while (closing_quote && s>prevline);
   1.270  	for (;s>prevline;s=g_utf8_prev_char(s))
   1.271  	{
   1.272  	    if (g_unichar_isalpha(g_utf8_get_char(s)))
   1.273 @@ -2548,7 +2582,7 @@
   1.274  	}
   1.275  	checked_linecnt++;
   1.276  	print_pending(aline,parastart,&pending);
   1.277 -	isemptyline=analyse_quotes(aline,&counters);
   1.278 +	isemptyline=analyse_quotes(aline,linecnt,&counters);
   1.279  	if (isnewpara && !isemptyline)
   1.280  	{
   1.281  	    /* This line is the start of a new paragraph. */