Fix bug #28: Don't report ., as double punctuation after "etc" or "&c"
authorali <ali@juiblex.co.uk>
Wed Oct 16 22:51:29 2013 +0100 (2013-10-16)
changeset 10470cc629ec1e0
parent 103 d22d8cd4f628
child 105 2d48e8cdda24
Fix bug #28: Don't report ., as double punctuation after "etc" or "&c"
bookloupe/bookloupe.c
test/bookloupe/Makefile.am
test/bookloupe/dot-comma.tst
     1.1 --- a/bookloupe/bookloupe.c	Sun Oct 27 17:01:47 2013 +0000
     1.2 +++ b/bookloupe/bookloupe.c	Wed Oct 16 22:51:29 2013 +0100
     1.3 @@ -2568,6 +2568,27 @@
     1.4  }
     1.5  
     1.6  /*
     1.7 + * str_follows_word:
     1.8 + *
     1.9 + * Given a position p within a string str, determine whether it follows the
    1.10 + * given word. This is roughly equivalent to the regular expression (?<=\bword)
    1.11 + * but has different boundary conditions.
    1.12 + */
    1.13 +static gboolean str_follows_word(const char *str,const char *p,const char *word)
    1.14 +{
    1.15 +    int len=strlen(word);
    1.16 +    if (p-len<str)
    1.17 +	return FALSE;
    1.18 +    else if (!g_str_has_prefix(p-len,word))
    1.19 +	return FALSE;
    1.20 +    else if (p-len==str)
    1.21 +	return TRUE;
    1.22 +    else
    1.23 +	/* Using non-alpha as a word boundary. See UAX #29 for a better way. */
    1.24 +	return !g_unichar_isalpha(g_utf8_get_char(g_utf8_prev_char(p-len)));
    1.25 +}
    1.26 +
    1.27 +/*
    1.28   * check_for_double_punctuation:
    1.29   *
    1.30   * Look for double punctuation like ,. or ,,
    1.31 @@ -2578,11 +2599,14 @@
    1.32   * common errors. What to do? Make these cases paranoid?
    1.33   * ".," is the most common, so warnings->dotcomma is used
    1.34   * to suppress detailed reporting if it occurs often.
    1.35 + * Indeed, ".," is so common after "etc" or "&c" that
    1.36 + * we don't warn on these cases at all.
    1.37   */
    1.38  void check_for_double_punctuation(const char *aline,struct warnings *warnings)
    1.39  {
    1.40      const char *s;
    1.41      gunichar c,nc;
    1.42 +    gboolean is_query;
    1.43      nc=g_utf8_get_char(aline);
    1.44      for (s=aline;*s;s=g_utf8_next_char(s))
    1.45      {
    1.46 @@ -2593,36 +2617,30 @@
    1.47  	  g_utf8_strchr(".?!,;:",-1,nc))
    1.48  	{
    1.49  	    /* followed by punctuation, it's a query, unless . . . */
    1.50 -	    if (c==nc && (c=='.' || c=='?' || c=='!') ||
    1.51 -	      !warnings->dotcomma && c=='.' && nc==',' ||
    1.52 -	      warnings->isFrench && g_str_has_prefix(s,",...") ||
    1.53 -	      warnings->isFrench && g_str_has_prefix(s,"...,") ||
    1.54 -	      warnings->isFrench && g_str_has_prefix(s,";...") ||
    1.55 -	      warnings->isFrench && g_str_has_prefix(s,"...;") ||
    1.56 -	      warnings->isFrench && g_str_has_prefix(s,":...") ||
    1.57 -	      warnings->isFrench && g_str_has_prefix(s,"...:") ||
    1.58 -	      warnings->isFrench && g_str_has_prefix(s,"!...") ||
    1.59 -	      warnings->isFrench && g_str_has_prefix(s,"...!") ||
    1.60 -	      warnings->isFrench && g_str_has_prefix(s,"?...") ||
    1.61 -	      warnings->isFrench && g_str_has_prefix(s,"...?"))
    1.62 +	    is_query=TRUE;
    1.63 +	    if (warnings->isFrench &&
    1.64 +	      (g_str_has_prefix(s,",...") || g_str_has_prefix(s,"...,") ||
    1.65 +	       g_str_has_prefix(s,";...") || g_str_has_prefix(s,"...;") ||
    1.66 +	       g_str_has_prefix(s,":...") || g_str_has_prefix(s,"...:") ||
    1.67 +	       g_str_has_prefix(s,"!...") || g_str_has_prefix(s,"...!") ||
    1.68 +	       g_str_has_prefix(s,"?...") || g_str_has_prefix(s,"...?")))
    1.69  	    {
    1.70 -		if (warnings->isFrench && g_str_has_prefix(s,",...") ||
    1.71 -		  warnings->isFrench && g_str_has_prefix(s,"...,") ||
    1.72 -		  warnings->isFrench && g_str_has_prefix(s,";...") ||
    1.73 -		  warnings->isFrench && g_str_has_prefix(s,"...;") ||
    1.74 -		  warnings->isFrench && g_str_has_prefix(s,":...") ||
    1.75 -		  warnings->isFrench && g_str_has_prefix(s,"...:") ||
    1.76 -		  warnings->isFrench && g_str_has_prefix(s,"!...") ||
    1.77 -		  warnings->isFrench && g_str_has_prefix(s,"...!") ||
    1.78 -		  warnings->isFrench && g_str_has_prefix(s,"?...") ||
    1.79 -		  warnings->isFrench && g_str_has_prefix(s,"...?"))
    1.80 -		{
    1.81 -		    s+=4;
    1.82 -		    nc=g_utf8_get_char(g_utf8_next_char(s));
    1.83 -		}
    1.84 -		; /* do nothing for .. !! and ?? which can be legit */
    1.85 +		s+=4;
    1.86 +		nc=g_utf8_get_char(g_utf8_next_char(s));
    1.87 +		is_query=FALSE;
    1.88  	    }
    1.89 -	    else
    1.90 +	    else if (c==nc && (c=='.' || c=='?' || c=='!'))
    1.91 +	    {
    1.92 +		/* do nothing for .. !! and ?? which can be legit */
    1.93 +		is_query=FALSE;
    1.94 +	    }
    1.95 +	    else if (c=='.' && nc==',')
    1.96 +	    {
    1.97 +		if (!warnings->dotcomma || str_follows_word(aline,s,"etc") || 
    1.98 +		  str_follows_word(aline,s,"&c"))
    1.99 +		    is_query=FALSE;
   1.100 +	    }
   1.101 +	    if (is_query)
   1.102  	    {
   1.103  		if (pswit[ECHO_SWITCH])
   1.104  		    g_print("\n%s\n",aline);
     2.1 --- a/test/bookloupe/Makefile.am	Sun Oct 27 17:01:47 2013 +0000
     2.2 +++ b/test/bookloupe/Makefile.am	Wed Oct 16 22:51:29 2013 +0100
     2.3 @@ -3,6 +3,6 @@
     2.4  	runfox-quotes.tst curved-genitives.tst multi-line-illustration.tst \
     2.5  	emdash.tst config-internal.tst config-default.tst config-user.tst \
     2.6  	config-override.tst charset-cp1252.tst charset-latin1.tst \
     2.7 -	footnote-marker.tst unix-lineends.tst os9-lineends.tst
     2.8 +	footnote-marker.tst unix-lineends.tst os9-lineends.tst dot-comma.tst
     2.9  
    2.10  dist_pkgdata_DATA=$(TESTS)
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/test/bookloupe/dot-comma.tst	Wed Oct 16 22:51:29 2013 +0100
     3.3 @@ -0,0 +1,6 @@
     3.4 +**************** INPUT ****************
     3.5 +All men should read the Gospels, &c., in their vulgar tongue.
     3.6 +
     3.7 +I would have redeemed them from the grip of Sheol, etc., if they had been
     3.8 +wise, but being foolish I will bring on them the plagues of death.
     3.9 +**************** EXPECTED ****************