1.1 --- a/bookloupe/bookloupe.c Sun Oct 27 17:01:47 2013 +0000
1.2 +++ b/bookloupe/bookloupe.c Wed Oct 16 22:51:29 2013 +0100
1.3 @@ -2568,6 +2568,27 @@
1.4 }
1.5
1.6 /*
1.7 + * str_follows_word:
1.8 + *
1.9 + * Given a position p within a string str, determine whether it follows the
1.10 + * given word. This is roughly equivalent to the regular expression (?<=\bword)
1.11 + * but has different boundary conditions.
1.12 + */
1.13 +static gboolean str_follows_word(const char *str,const char *p,const char *word)
1.14 +{
1.15 + int len=strlen(word);
1.16 + if (p-len<str)
1.17 + return FALSE;
1.18 + else if (!g_str_has_prefix(p-len,word))
1.19 + return FALSE;
1.20 + else if (p-len==str)
1.21 + return TRUE;
1.22 + else
1.23 + /* Using non-alpha as a word boundary. See UAX #29 for a better way. */
1.24 + return !g_unichar_isalpha(g_utf8_get_char(g_utf8_prev_char(p-len)));
1.25 +}
1.26 +
1.27 +/*
1.28 * check_for_double_punctuation:
1.29 *
1.30 * Look for double punctuation like ,. or ,,
1.31 @@ -2578,11 +2599,14 @@
1.32 * common errors. What to do? Make these cases paranoid?
1.33 * ".," is the most common, so warnings->dotcomma is used
1.34 * to suppress detailed reporting if it occurs often.
1.35 + * Indeed, ".," is so common after "etc" or "&c" that
1.36 + * we don't warn on these cases at all.
1.37 */
1.38 void check_for_double_punctuation(const char *aline,struct warnings *warnings)
1.39 {
1.40 const char *s;
1.41 gunichar c,nc;
1.42 + gboolean is_query;
1.43 nc=g_utf8_get_char(aline);
1.44 for (s=aline;*s;s=g_utf8_next_char(s))
1.45 {
1.46 @@ -2593,36 +2617,30 @@
1.47 g_utf8_strchr(".?!,;:",-1,nc))
1.48 {
1.49 /* followed by punctuation, it's a query, unless . . . */
1.50 - if (c==nc && (c=='.' || c=='?' || c=='!') ||
1.51 - !warnings->dotcomma && c=='.' && nc==',' ||
1.52 - warnings->isFrench && g_str_has_prefix(s,",...") ||
1.53 - warnings->isFrench && g_str_has_prefix(s,"...,") ||
1.54 - warnings->isFrench && g_str_has_prefix(s,";...") ||
1.55 - warnings->isFrench && g_str_has_prefix(s,"...;") ||
1.56 - warnings->isFrench && g_str_has_prefix(s,":...") ||
1.57 - warnings->isFrench && g_str_has_prefix(s,"...:") ||
1.58 - warnings->isFrench && g_str_has_prefix(s,"!...") ||
1.59 - warnings->isFrench && g_str_has_prefix(s,"...!") ||
1.60 - warnings->isFrench && g_str_has_prefix(s,"?...") ||
1.61 - warnings->isFrench && g_str_has_prefix(s,"...?"))
1.62 + is_query=TRUE;
1.63 + if (warnings->isFrench &&
1.64 + (g_str_has_prefix(s,",...") || g_str_has_prefix(s,"...,") ||
1.65 + g_str_has_prefix(s,";...") || g_str_has_prefix(s,"...;") ||
1.66 + g_str_has_prefix(s,":...") || g_str_has_prefix(s,"...:") ||
1.67 + g_str_has_prefix(s,"!...") || g_str_has_prefix(s,"...!") ||
1.68 + g_str_has_prefix(s,"?...") || g_str_has_prefix(s,"...?")))
1.69 {
1.70 - if (warnings->isFrench && g_str_has_prefix(s,",...") ||
1.71 - warnings->isFrench && g_str_has_prefix(s,"...,") ||
1.72 - warnings->isFrench && g_str_has_prefix(s,";...") ||
1.73 - warnings->isFrench && g_str_has_prefix(s,"...;") ||
1.74 - warnings->isFrench && g_str_has_prefix(s,":...") ||
1.75 - warnings->isFrench && g_str_has_prefix(s,"...:") ||
1.76 - warnings->isFrench && g_str_has_prefix(s,"!...") ||
1.77 - warnings->isFrench && g_str_has_prefix(s,"...!") ||
1.78 - warnings->isFrench && g_str_has_prefix(s,"?...") ||
1.79 - warnings->isFrench && g_str_has_prefix(s,"...?"))
1.80 - {
1.81 - s+=4;
1.82 - nc=g_utf8_get_char(g_utf8_next_char(s));
1.83 - }
1.84 - ; /* do nothing for .. !! and ?? which can be legit */
1.85 + s+=4;
1.86 + nc=g_utf8_get_char(g_utf8_next_char(s));
1.87 + is_query=FALSE;
1.88 }
1.89 - else
1.90 + else if (c==nc && (c=='.' || c=='?' || c=='!'))
1.91 + {
1.92 + /* do nothing for .. !! and ?? which can be legit */
1.93 + is_query=FALSE;
1.94 + }
1.95 + else if (c=='.' && nc==',')
1.96 + {
1.97 + if (!warnings->dotcomma || str_follows_word(aline,s,"etc") ||
1.98 + str_follows_word(aline,s,"&c"))
1.99 + is_query=FALSE;
1.100 + }
1.101 + if (is_query)
1.102 {
1.103 if (pswit[ECHO_SWITCH])
1.104 g_print("\n%s\n",aline);
2.1 --- a/test/bookloupe/Makefile.am Sun Oct 27 17:01:47 2013 +0000
2.2 +++ b/test/bookloupe/Makefile.am Wed Oct 16 22:51:29 2013 +0100
2.3 @@ -3,6 +3,6 @@
2.4 runfox-quotes.tst curved-genitives.tst multi-line-illustration.tst \
2.5 emdash.tst config-internal.tst config-default.tst config-user.tst \
2.6 config-override.tst charset-cp1252.tst charset-latin1.tst \
2.7 - footnote-marker.tst unix-lineends.tst os9-lineends.tst
2.8 + footnote-marker.tst unix-lineends.tst os9-lineends.tst dot-comma.tst
2.9
2.10 dist_pkgdata_DATA=$(TESTS)
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
3.2 +++ b/test/bookloupe/dot-comma.tst Wed Oct 16 22:51:29 2013 +0100
3.3 @@ -0,0 +1,6 @@
3.4 +**************** INPUT ****************
3.5 +All men should read the Gospels, &c., in their vulgar tongue.
3.6 +
3.7 +I would have redeemed them from the grip of Sheol, etc., if they had been
3.8 +wise, but being foolish I will bring on them the plagues of death.
3.9 +**************** EXPECTED ****************