# HG changeset patch # User ali # Date 1380058082 -3600 # Node ID f805130deb6fded128f6705eedbbf1e7d786c723 # Parent 9fb13a5dde3b18e7e14910cf062672cef8995f6d Fix bug #11: Test for balanced "slanted" UTF-8 quotation marks 8220/8221 diff -r 9fb13a5dde3b -r f805130deb6f bl/Makefile.am --- a/bl/Makefile.am Mon Sep 23 21:18:27 2013 +0100 +++ b/bl/Makefile.am Tue Sep 24 22:28:02 2013 +0100 @@ -4,4 +4,4 @@ noinst_LTLIBRARIES=libbl.la libbl_la_SOURCES=bl.h textfileutils.c textfileutils.h spawn.c spawn.h \ - path.c path.h mkdtemp.c mkdtemp.h print.c print.h + path.c path.h mkdtemp.c mkdtemp.h print.c print.h utf8.c utf8.h diff -r 9fb13a5dde3b -r f805130deb6f bl/bl.h --- a/bl/bl.h Mon Sep 23 21:18:27 2013 +0100 +++ b/bl/bl.h Tue Sep 24 22:28:02 2013 +0100 @@ -3,3 +3,4 @@ #include #include #include +#include diff -r 9fb13a5dde3b -r f805130deb6f bl/utf8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bl/utf8.c Tue Sep 24 22:28:02 2013 +0100 @@ -0,0 +1,24 @@ +#include +#include +#include +#include + +/* + * Creates a new string length bytes long filled with fill_char. + * The returned string should be freed when no longer needed. + */ +gchar *utf8_strnfill(gsize length,gunichar fill_char) +{ + int n,i; + gchar *s; + char utf8[6]; + n=g_unichar_to_utf8(fill_char,utf8); + s=g_new(gchar,length*n+1); + if (n==1) + memset(s,utf8[0],length); + else + for(i=0;iquot++; - if (CHAR_IS_SQUOTE(c)) + increment_matching(counters,c,!matching_difference(counters,c)); + else if (CHAR_IS_DQUOTE(c)) + increment_matching(counters,c,!CHAR_IS_CLOSING_QUOTE(c)); + else if (CHAR_IS_SQUOTE(c)) { if (s==aline) { diff -r 9fb13a5dde3b -r f805130deb6f bookloupe/bookloupe.h --- a/bookloupe/bookloupe.h Mon Sep 23 21:18:27 2013 +0100 +++ b/bookloupe/bookloupe.h Tue Sep 24 22:28:02 2013 +0100 @@ -24,14 +24,20 @@ #define CHAR_LS_QUOTE 0x2018 #define CHAR_RS_QUOTE 0x2019 +#define CHAR_LD_QUOTE 0x201C +#define CHAR_RD_QUOTE 0x201D #define CHAR_IS_SQUOTE(c) ((c)==CHAR_SQUOTE || (c)==CHAR_OPEN_SQUOTE || \ (c)==CHAR_LS_QUOTE || (c)==CHAR_RS_QUOTE) +#define CHAR_IS_DQUOTE(c) ((c)==CHAR_DQUOTE || (c)==CHAR_LD_QUOTE || \ + (c)==CHAR_RD_QUOTE) + #define CHAR_IS_APOSTROPHE(c) ((c)==CHAR_SQUOTE || (c)==CHAR_RS_QUOTE) #define CHAR_IS_CLOSING_QUOTE(c) \ - ((c)==CHAR_DQUOTE || (c)==CHAR_SQUOTE || (c)==CHAR_RS_QUOTE) + ((c)==CHAR_DQUOTE || (c)==CHAR_RD_QUOTE || (c)==CHAR_SQUOTE || \ + (c)==CHAR_RS_QUOTE) /* longest and shortest normal PG line lengths */ #define LONGEST_PG_LINE 75 diff -r 9fb13a5dde3b -r f805130deb6f bookloupe/counters.c --- a/bookloupe/counters.c Mon Sep 23 21:18:27 2013 +0100 +++ b/bookloupe/counters.c Tue Sep 24 22:28:02 2013 +0100 @@ -45,11 +45,13 @@ return GINT_TO_POINTER((gint)CHAR_SQUOTE); else if (ch==CHAR_LS_QUOTE || ch==CHAR_RS_QUOTE) return GINT_TO_POINTER((gint)CHAR_LS_QUOTE); + else if (ch==CHAR_LD_QUOTE || ch==CHAR_RD_QUOTE) + return GINT_TO_POINTER((gint)CHAR_LD_QUOTE); + else if (ch==CHAR_DQUOTE) + return GINT_TO_POINTER((gint)ch); else if (ch<0x4000 || ch-0x4000>=NO_SPECIAL_COUNTERS) - { g_warning("Matching pair not found for U+%04" G_GINT32_MODIFIER "X",ch); - return GINT_TO_POINTER((gint)ch); - } + return GINT_TO_POINTER((gint)ch); } void increment_matching(struct counters *counters,gunichar ch,gboolean open) diff -r 9fb13a5dde3b -r f805130deb6f bookloupe/counters.h --- a/bookloupe/counters.h Mon Sep 23 21:18:27 2013 +0100 +++ b/bookloupe/counters.h Tue Sep 24 22:28:02 2013 +0100 @@ -11,7 +11,6 @@ struct counters { GTree *matching; - long quot; int c_unders; }; diff -r 9fb13a5dde3b -r f805130deb6f bookloupe/pending.c --- a/bookloupe/pending.c Mon Sep 23 21:18:27 2013 +0100 +++ b/bookloupe/pending.c Tue Sep 24 22:28:02 2013 +0100 @@ -1,6 +1,7 @@ #include #include #include +#include #include "bookloupe.h" #include "pending.h" @@ -15,20 +16,9 @@ void print_pending(const char *aline,const char *parastart, struct pending *pending) { - const char *s; - gunichar c; if (aline) - { - s=aline; - while (*s==' ') - s++; - c=g_utf8_get_char(s); - } - else - { - s=NULL; - c='\0'; - } + while (g_unichar_isspace(g_utf8_get_char(aline))) + aline=g_utf8_next_char(aline); if (pending->illustration.warning_text) { if (aline) @@ -54,7 +44,8 @@ } if (pending->dquote) { - if (c!=CHAR_DQUOTE || pswit[QPARA_SWITCH]) + if (!pending->continuing_quote || !aline || + !g_str_has_prefix(aline,pending->continuing_quote)) { if (!pswit[OVERVIEW_SWITCH]) { @@ -70,7 +61,8 @@ } if (pending->squote) { - if (!CHAR_IS_SQUOTE(c) || pswit[QPARA_SWITCH] || pending->squot) + if (!pending->continuing_quote || + !g_str_has_prefix(aline,pending->continuing_quote)) { if (!pswit[OVERVIEW_SWITCH]) { @@ -84,6 +76,8 @@ g_free(pending->squote); pending->squote=NULL; } + g_free(pending->continuing_quote); + pending->continuing_quote=NULL; if (pending->rbrack) { if (!pswit[OVERVIEW_SWITCH]) @@ -159,34 +153,58 @@ * quotes on _every_ paragraph, whether the next begins with a * quote or not. */ -void check_for_mismatched_quotes(const struct counters *counters, +void check_for_mismatched_quotes(struct counters *counters, struct pending *pending) { - int squote_straight,squote_curved,difference; - if (counters->quot%2) + int quote_straight,quote_curved,difference; + quote_straight=matching_difference(counters,CHAR_DQUOTE); + quote_curved=matching_difference(counters,CHAR_LD_QUOTE); + if (quote_straight || quote_curved) + { pending->dquote= g_strdup_printf(" Line %ld - Mismatched quotes",linecnt); + if (pswit[QPARA_SWITCH] || quote_curved && quote_curved!=1 || + quote_straight && quote_curved) + /* + * Flag it to be noted regardless of the + * first line of the next para. + */ + pending->continuing_quote=NULL; + else if (quote_straight) + pending->continuing_quote=utf8_strnfill(quote_straight,CHAR_DQUOTE); + else + pending->continuing_quote=utf8_strnfill(quote_curved,CHAR_LD_QUOTE); + } if (pswit[SQUOTE_SWITCH]) { if (matching_count(counters,CHAR_SQUOTE,TRUE)) - squote_straight=matching_difference(counters,CHAR_SQUOTE); + quote_straight=matching_difference(counters,CHAR_SQUOTE); else - squote_straight=0; + quote_straight=0; if (matching_count(counters,CHAR_LS_QUOTE,TRUE)) - squote_curved=matching_difference(counters,CHAR_LS_QUOTE); + quote_curved=matching_difference(counters,CHAR_LS_QUOTE); else - squote_curved=0; - if (squote_straight || squote_curved) + quote_curved=0; + if (quote_straight || quote_curved) pending->squote= g_strdup_printf(" Line %ld - Mismatched singlequotes?", linecnt); - if (squote_straight && squote_straight!=1 || - squote_curved && squote_curved!=1) + if (pending->continuing_quote) + { /* * Flag it to be noted regardless of the - * first char of the next para. + * first line of the next para. */ - pending->squot=1; + g_free(pending->continuing_quote); + pending->continuing_quote=NULL; + } + if (pswit[QPARA_SWITCH] || quote_straight && quote_straight!=1 || + quote_curved && quote_curved!=1 || quote_straight && quote_curved) + pending->continuing_quote=NULL; + else if (quote_straight) + pending->continuing_quote=utf8_strnfill(quote_straight,CHAR_SQUOTE); + else + pending->continuing_quote=utf8_strnfill(quote_curved,CHAR_LS_QUOTE); } difference=matching_difference(counters,COUNTER_ILLUSTRATION); if (difference) diff -r 9fb13a5dde3b -r f805130deb6f bookloupe/pending.h --- a/bookloupe/pending.h Mon Sep 23 21:18:27 2013 +0100 +++ b/bookloupe/pending.h Tue Sep 24 22:28:02 2013 +0100 @@ -10,14 +10,14 @@ struct pending { char *dquote,*squote,*rbrack,*sbrack,*cbrack,*unders; - long squot; + char *continuing_quote; struct pending_warning illustration; }; void print_pending(const char *aline,const char *parastart, struct pending *pending); void reset_pending(struct pending *pending); -void check_for_mismatched_quotes(const struct counters *counters, +void check_for_mismatched_quotes(struct counters *counters, struct pending *pending); #endif /* PENDING_H */ diff -r 9fb13a5dde3b -r f805130deb6f test/bookloupe/Makefile.am --- a/test/bookloupe/Makefile.am Mon Sep 23 21:18:27 2013 +0100 +++ b/test/bookloupe/Makefile.am Tue Sep 24 22:28:02 2013 +0100 @@ -1,5 +1,5 @@ TESTS_ENVIRONMENT=BOOKLOUPE=../../bookloupe/bookloupe ../harness/loupe-test -TESTS=non-ascii.tst long-line.tst curved-single-quotes.tst \ +TESTS=non-ascii.tst long-line.tst curved-single-quotes.tst curved-quotes.tst \ curved-genitives.tst multi-line-illustration.tst dist_pkgdata_DATA=$(TESTS) diff -r 9fb13a5dde3b -r f805130deb6f test/bookloupe/curved-quotes.tst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/bookloupe/curved-quotes.tst Tue Sep 24 22:28:02 2013 +0100 @@ -0,0 +1,35 @@ +**************** INPUT **************** +When Tom had made fast his motorboat, he went to the rowing craft to +see if it was in good condition. He saw a piece of paper on one of the +seats, held down by a little stone. Picking it up he read: + + “Many thanks for the use of your boat. I had a fine row, and + I feel better, though I’m as much up a tree as ever. I hope + to see you again, sometime. If ever you are near Elmwood Hall, + look me up. + + “BRUCE BENNINGTON.” + +That was nice of him,” remarked Will, as Tom showed him the note. + +“And he didn’t damage your boat any," spoke Dick. + +"No, he knows how to handle ’em--he rows on the Elmwood Hall crew,” said +Tom. “Well, so long, fellows. I’m going for a long run to-morrow, if +you’d like to come.” + +“Sure! they chorused. +**************** WARNINGS **************** + + + + + + + Mismatched quotes + + + + Wrongspaced quotes? + + diff -r 9fb13a5dde3b -r f805130deb6f test/compatibility/Makefile.am --- a/test/compatibility/Makefile.am Mon Sep 23 21:18:27 2013 +0100 +++ b/test/compatibility/Makefile.am Tue Sep 24 22:28:02 2013 +0100 @@ -7,6 +7,7 @@ dashes.tst control-characters.tst unusual-characters.tst \ windows-1252.tst periods.tst long-line.tst unmarked-paragraph.tst \ hebe-jeebies.tst mail-from.tst scannos.tst before-comma.tst \ - before-period.tst double-punctuation.tst genitives.tst embedded-cr.tst + before-period.tst double-punctuation.tst genitives.tst embedded-cr.tst \ + continuing-quotes.tst dist_pkgdata_DATA=$(TESTS) diff -r 9fb13a5dde3b -r f805130deb6f test/compatibility/continuing-quotes.tst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compatibility/continuing-quotes.tst Tue Sep 24 22:28:02 2013 +0100 @@ -0,0 +1,14 @@ +**************** INPUT **************** +When Tom had made fast his motorboat, he went to the rowing craft to +see if it was in good condition. He saw a piece of paper on one of the +seats, held down by a little stone. Picking it up he read: + + "Many thanks for the use of your boat. I had a fine row, and + I feel better, though I'm as much up a tree as ever. I hope + to see you again, sometime. If ever you are near Elmwood Hall, + look me up. + + "BRUCE BENNINGTON." + +"That was nice of him," remarked Will, as Tom showed him the note. +**************** EXPECTED ****************