# HG changeset patch # User ali # Date 1379803218 -3600 # Node ID 7a62c77a0dbe1713a9f01fa7f4222688e0044875 # Parent 9a5f6d17e86e4993f8e1f56348f99aff6fe3cf9e Fix bug #6: BL treats a slanted apostrophe ? as a word separator, not as a contraction or possessive diff -r 9a5f6d17e86e -r 7a62c77a0dbe bookloupe/Makefile.am --- a/bookloupe/Makefile.am Tue Sep 17 20:55:57 2013 +0100 +++ b/bookloupe/Makefile.am Sat Sep 21 23:40:18 2013 +0100 @@ -1,5 +1,6 @@ INCLUDES=-I$(top_srcdir) bin_PROGRAMS=bookloupe +bookloupe_SOURCES=bookloupe.c bookloupe.h counters.c counters.h pkgdata_DATA=bookloupe.typ AM_CFLAGS=$(GLIB_CFLAGS) LIBS=$(GLIB_LIBS) diff -r 9a5f6d17e86e -r 7a62c77a0dbe bookloupe/bookloupe.c --- a/bookloupe/bookloupe.c Tue Sep 17 20:55:57 2013 +0100 +++ b/bookloupe/bookloupe.c Sat Sep 21 23:40:18 2013 +0100 @@ -27,6 +27,8 @@ #endif #include #include +#include "bookloupe.h" +#include "counters.h" #include "HTMLentities.h" gchar *prevline; @@ -123,50 +125,6 @@ "among", "those", "into", "whom", "having", "thence", "" }; -/* special characters */ -#define CHAR_SPACE 32 -#define CHAR_TAB 9 -#define CHAR_LF 10 -#define CHAR_CR 13 -#define CHAR_DQUOTE 34 -#define CHAR_SQUOTE 39 -#define CHAR_OPEN_SQUOTE 96 -#define CHAR_TILDE 126 -#define CHAR_ASTERISK 42 -#define CHAR_FORESLASH 47 -#define CHAR_CARAT 94 - -#define CHAR_UNDERSCORE '_' -#define CHAR_OPEN_CBRACK '{' -#define CHAR_CLOSE_CBRACK '}' -#define CHAR_OPEN_RBRACK '(' -#define CHAR_CLOSE_RBRACK ')' -#define CHAR_OPEN_SBRACK '[' -#define CHAR_CLOSE_SBRACK ']' - -/* longest and shortest normal PG line lengths */ -#define LONGEST_PG_LINE 75 -#define WAY_TOO_LONG 80 -#define SHORTEST_PG_LINE 55 - -enum { - ECHO_SWITCH, - SQUOTE_SWITCH, - TYPO_SWITCH, - QPARA_SWITCH, - PARANOID_SWITCH, - LINE_END_SWITCH, - OVERVIEW_SWITCH, - STDOUT_SWITCH, - HEADER_SWITCH, - WEB_SWITCH, - VERBOSE_SWITCH, - MARKUP_SWITCH, - USERTYPO_SWITCH, - DP_SWITCH, - SWITNO -}; - gboolean pswit[SWITNO]; /* program switches */ static GOptionEntry options[]={ @@ -242,40 +200,6 @@ UINT saved_cp; #endif -struct first_pass_results { - long firstline,astline; - long footerline,totlen,binlen,alphalen,endquote_count,shortline,dotcomma; - long fslashline,hyphens,longline,verylongline,htmcount,standalone_digit; - long spacedash,emdash,space_emdash,non_PG_space_emdash,PG_space_emdash; - int Dutchcount,Frenchcount; -}; - -struct warnings { - int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen; - int endquote; - gboolean isDutch,isFrench; -}; - -struct counters { - long quot; - int c_unders,c_brack,s_brack,r_brack; - int open_single_quote,close_single_quote; -}; - -struct line_properties { - unsigned int len,blen; - gunichar start; -}; - -struct parities { - int dquote,squote; -}; - -struct pending { - char *dquote,*squote,*rbrack,*sbrack,*cbrack,*unders; - long squot; -}; - void parse_options(int *argc,char ***argv) { GError *err=NULL; @@ -877,7 +801,7 @@ c=g_utf8_get_char(s); if (c==CHAR_DQUOTE) counters->quot++; - if (c==CHAR_SQUOTE || c==CHAR_OPEN_SQUOTE) + if (CHAR_IS_SQUOTE(c)) { if (s==aline) { @@ -887,21 +811,21 @@ */ if (!g_str_has_prefix(snext,"tis") && !g_str_has_prefix(snext,"Tis")) - counters->open_single_quote++; + increment_matching(counters,c,TRUE); } else if (g_unichar_isalpha(g_utf8_get_char(sprev)) && g_unichar_isalpha(g_utf8_get_char(snext))) /* Do nothing! it's definitely an apostrophe, not a quote */ ; /* it's outside a word - let's check it out */ - else if (c==CHAR_OPEN_SQUOTE || + else if (c==CHAR_OPEN_SQUOTE || c==CHAR_LS_QUOTE || g_unichar_isalpha(g_utf8_get_char(snext))) { /* it damwell better BE an openquote */ if (!g_str_has_prefix(snext,"tis") && !g_str_has_prefix(snext,"Tis")) /* hardcode a very common exception! */ - counters->open_single_quote++; + increment_matching(counters,c,TRUE); } else { @@ -926,7 +850,7 @@ guessquote+=8; /* looks like a closequote */ else guessquote++; - if (counters->open_single_quote>counters->close_single_quote) + if (matching_difference(counters,CHAR_SQUOTE)>0) /* * Give it the benefit of some doubt, * if a squote is already open. @@ -935,7 +859,7 @@ else guessquote--; if (guessquote>=0) - counters->close_single_quote++; + increment_matching(counters,c,FALSE); } } if (c!=CHAR_SPACE && c!='-' && c!='.' && c!=CHAR_ASTERISK && @@ -943,18 +867,11 @@ isemptyline=FALSE; /* ignore lines like * * * as spacers */ if (c==CHAR_UNDERSCORE) counters->c_unders++; - if (c==CHAR_OPEN_CBRACK) - counters->c_brack++; - if (c==CHAR_CLOSE_CBRACK) - counters->c_brack--; - if (c==CHAR_OPEN_RBRACK) - counters->r_brack++; - if (c==CHAR_CLOSE_RBRACK) - counters->r_brack--; - if (c==CHAR_OPEN_SBRACK) - counters->s_brack++; - if (c==CHAR_CLOSE_SBRACK) - counters->s_brack--; + if (c==CHAR_OPEN_CBRACK || c==CHAR_OPEN_RBRACK || c==CHAR_OPEN_SBRACK) + increment_matching(counters,c,TRUE); + if (c==CHAR_CLOSE_CBRACK || c==CHAR_CLOSE_RBRACK || + c==CHAR_CLOSE_SBRACK) + increment_matching(counters,c,FALSE); sprev=s; s=snext; } @@ -1423,12 +1340,12 @@ */ void check_for_extra_period(const char *aline,const struct warnings *warnings) { - const char *s,*t,*s1; + const char *s,*t,*s1,*sprev; int i; gsize len; gboolean istypo; gchar *testword; - gunichar *decomposition; + gunichar c,nc,pc,*decomposition; if (pswit[PARANOID_SWITCH]) { for (t=aline;t=strstr(t,". ");) @@ -1452,8 +1369,9 @@ c3=g_utf8_get_char(g_utf8_offset_to_pointer(t,3)); c4=g_utf8_get_char(g_utf8_offset_to_pointer(t,4)); c5=g_utf8_get_char(g_utf8_offset_to_pointer(t,5)); - if (c2==CHAR_SQUOTE && g_unichar_islower(c3) && - c4==CHAR_SPACE && g_unichar_isupper(c5)) + if (CHAR_IS_APOSTROPHE(c2) && + g_unichar_islower(c3) && c4==CHAR_SPACE && + g_unichar_isupper(c5)) { t=g_utf8_next_char(t); continue; @@ -1468,14 +1386,22 @@ /* we have something to investigate */ istypo=TRUE; /* so let's go back and find out */ - for (s1=g_utf8_prev_char(t);s1>=aline && - (g_unichar_isalpha(g_utf8_get_char(s1)) || - g_unichar_isdigit(g_utf8_get_char(s1)) || - g_utf8_get_char(s1)==CHAR_SQUOTE && - g_unichar_isalpha(g_utf8_get_char(g_utf8_next_char(s1))) && - g_unichar_isalpha(g_utf8_get_char(g_utf8_prev_char(s1)))); - s1=g_utf8_prev_char(s1)) - ; + nc=g_utf8_get_char(t); + s1=g_utf8_prev_char(t); + c=g_utf8_get_char(s1); + sprev=g_utf8_prev_char(s1); + pc=g_utf8_get_char(sprev); + while (s1>=aline && + (g_unichar_isalpha(c) || g_unichar_isdigit(c) || + g_unichar_isalpha(pc) && CHAR_IS_APOSTROPHE(c) && + g_unichar_isalpha(nc))) + { + nc=c; + s1=sprev; + c=pc; + sprev=g_utf8_prev_char(s1); + pc=g_utf8_get_char(sprev); + } s1=g_utf8_next_char(s1); s=strchr(s1,'.'); if (s) @@ -1600,7 +1526,7 @@ gchar *testword; int i,vowel,consonant,*dupcnt; gboolean isdup,istypo,alower; - gunichar c; + gunichar c,pc; long offset,len; gsize decomposition_len; for (s=aline;*s;) @@ -1646,11 +1572,14 @@ * French contractions like l'Abbe */ offset=g_utf8_pointer_to_offset(inword,t); + if (offset>0) + pc=g_utf8_get_char(g_utf8_prev_char(t)); + else + pc='\0'; if (offset==2 && c=='m' && g_utf8_get_char(nt)=='c' || offset==3 && c=='m' && g_utf8_get_char(nt)=='a' && g_utf8_get_char(g_utf8_next_char(nt))=='c' || - offset>0 && - g_utf8_get_char(g_utf8_prev_char(t))==CHAR_SQUOTE) + CHAR_IS_APOSTROPHE(pc)) ; /* do nothing! */ else istypo=TRUE; @@ -2050,8 +1979,7 @@ { c=nc; nc=g_utf8_get_char(g_utf8_next_char(s)); - if ((c==CHAR_SQUOTE || c==CHAR_OPEN_SQUOTE) && (s==aline || - s>aline && + if (CHAR_IS_SQUOTE(c) && (s==aline || s>aline && !g_unichar_isalpha(g_utf8_get_char(g_utf8_prev_char(s))) || !g_unichar_isalpha(nc))) { @@ -2166,7 +2094,11 @@ */ void check_for_spaced_quotes(const char *aline) { + int i; const char *s,*t; + const gunichar single_quotes[]={CHAR_SQUOTE,CHAR_OPEN_SQUOTE,CHAR_LS_QUOTE, + CHAR_RS_QUOTE}; + GString *pattern; s=aline; while ((t=strstr(s," \" "))) { @@ -2179,30 +2111,26 @@ cnt_punct++; s=g_utf8_next_char(g_utf8_next_char(t)); } - s=aline; - while ((t=strstr(s," ' "))) + pattern=g_string_new(NULL); + for(i=0;istr))) + { + if (pswit[ECHO_SWITCH]) + g_print("\n%s\n",aline); + if (!pswit[OVERVIEW_SWITCH]) + g_print(" Line %ld column %ld - Spaced singlequote?\n", + linecnt,g_utf8_pointer_to_offset(aline,t)+1); + else + cnt_punct++; + s=g_utf8_next_char(g_utf8_next_char(t)); + } } - s=aline; - while ((t=strstr(s," ` "))) - { - if (pswit[ECHO_SWITCH]) - g_print("\n%s\n",aline); - if (!pswit[OVERVIEW_SWITCH]) - g_print(" Line %ld column %ld - Spaced singlequote?\n", - linecnt,g_utf8_pointer_to_offset(aline,t)+1); - else - cnt_punct++; - s=g_utf8_next_char(g_utf8_next_char(t)); - } + g_string_free(pattern,TRUE); } /* @@ -2223,7 +2151,7 @@ pc=c; c=nc; nc=g_utf8_get_char(g_utf8_next_char(s)); - if (c==CHAR_SQUOTE && nc=='S' && g_unichar_islower(pc)) + if (CHAR_IS_APOSTROPHE(c) && nc=='S' && g_unichar_islower(pc)) { if (pswit[ECHO_SWITCH]) g_print("\n%s\n",aline); @@ -2255,8 +2183,7 @@ s=g_utf8_prev_char(aline+lbytes); c1=g_utf8_get_char(s); c2=g_utf8_get_char(g_utf8_prev_char(s)); - if ((c1==CHAR_DQUOTE || c1==CHAR_SQUOTE || c1==CHAR_OPEN_SQUOTE) && - c2==CHAR_SPACE) + if ((c1==CHAR_DQUOTE || CHAR_IS_SQUOTE(c1)) && c2==CHAR_SPACE) { if (pswit[ECHO_SWITCH]) g_print("\n%s\n",aline); @@ -2268,7 +2195,7 @@ } c1=g_utf8_get_char(aline); c2=g_utf8_get_char(g_utf8_next_char(aline)); - if ((c1==CHAR_SQUOTE || c1==CHAR_OPEN_SQUOTE) && c2==CHAR_SPACE) + if (CHAR_IS_SQUOTE(c1) && c2==CHAR_SPACE) { if (pswit[ECHO_SWITCH]) g_print("\n%s\n",aline); @@ -2470,8 +2397,7 @@ } if (pending->squote) { - if (c!=CHAR_SQUOTE && c!=CHAR_OPEN_SQUOTE || pswit[QPARA_SWITCH] || - pending->squot) + if (!CHAR_IS_SQUOTE(c) || pswit[QPARA_SWITCH] || pending->squot) { if (!pswit[OVERVIEW_SWITCH]) { @@ -2558,28 +2484,39 @@ void check_for_mismatched_quotes(const struct counters *counters, struct pending *pending) { + int squote_straight,squote_curved; if (counters->quot%2) pending->dquote= g_strdup_printf(" Line %ld - Mismatched quotes",linecnt); - if (pswit[SQUOTE_SWITCH] && counters->open_single_quote && - counters->open_single_quote!=counters->close_single_quote) - pending->squote= - g_strdup_printf(" Line %ld - Mismatched singlequotes?",linecnt); - if (pswit[SQUOTE_SWITCH] && counters->open_single_quote && - counters->open_single_quote!=counters->close_single_quote && - counters->open_single_quote!=counters->close_single_quote+1) - /* - * Flag it to be noted regardless of the - * first char of the next para. - */ - pending->squot=1; - if (counters->r_brack) + if (pswit[SQUOTE_SWITCH]) + { + if (matching_count(counters,CHAR_SQUOTE,TRUE)) + squote_straight=matching_difference(counters,CHAR_SQUOTE); + else + squote_straight=0; + if (matching_count(counters,CHAR_LS_QUOTE,TRUE)) + squote_curved=matching_difference(counters,CHAR_LS_QUOTE); + else + squote_curved=0; + if (squote_straight || squote_curved) + pending->squote= + g_strdup_printf(" Line %ld - Mismatched singlequotes?", + linecnt); + if (squote_straight && squote_straight!=1 || + squote_curved && squote_curved!=1) + /* + * Flag it to be noted regardless of the + * first char of the next para. + */ + pending->squot=1; + } + if (matching_difference(counters,CHAR_OPEN_RBRACK)) pending->rbrack= g_strdup_printf(" Line %ld - Mismatched round brackets?",linecnt); - if (counters->s_brack) + if (matching_difference(counters,CHAR_OPEN_SBRACK)) pending->sbrack= g_strdup_printf(" Line %ld - Mismatched square brackets?",linecnt); - if (counters->c_brack) + if (matching_difference(counters,CHAR_OPEN_CBRACK)) pending->cbrack= g_strdup_printf(" Line %ld - Mismatched curly brackets?",linecnt); if (counters->c_unders%2) @@ -2603,6 +2540,7 @@ { gboolean letter_on_line=FALSE; const char *s; + gunichar c; for (s=prevline;*s;s=g_utf8_next_char(s)) if (g_unichar_isalpha(g_utf8_get_char(s))) { @@ -2619,12 +2557,12 @@ if (letter_on_line && last->blen>2 && start_para_lineCHAR_SPACE) { - for (s=g_utf8_prev_char(prevline+strlen(prevline)); - (g_utf8_get_char(s)==CHAR_DQUOTE || - g_utf8_get_char(s)==CHAR_SQUOTE) && - g_utf8_get_char(s)>CHAR_SPACE && s>prevline; - s=g_utf8_prev_char(s)) - ; + s=prevline+strlen(prevline); + do + { + s=g_utf8_prev_char(s); + c=g_utf8_get_char(s); + } while (CHAR_IS_CLOSING_QUOTE(c) && c>CHAR_SPACE && s>prevline); for (;s>prevline;s=g_utf8_prev_char(s)) { if (g_unichar_isalpha(g_utf8_get_char(s))) @@ -2857,6 +2795,7 @@ g_tree_foreach(qword,report_duplicate_queries,NULL); g_tree_unref(qword); g_tree_unref(qperiod); + counters_destroy(&counters); g_set_print_handler(NULL); print_as_windows_1252(NULL); if (pswit[MARKUP_SWITCH]) @@ -3066,10 +3005,10 @@ } /* we didn't find a punctuated number - do the regular getword thing */ g_string_truncate(word,0); - for (;g_unichar_isdigit(g_utf8_get_char(*ptr)) || - g_unichar_isalpha(g_utf8_get_char(*ptr)) || - g_utf8_get_char(*ptr)=='\'';*ptr=g_utf8_next_char(*ptr)) - g_string_append_unichar(word,g_utf8_get_char(*ptr)); + c=g_utf8_get_char(*ptr); + for (;g_unichar_isdigit(c) || g_unichar_isalpha(c) || CHAR_IS_APOSTROPHE(c); + *ptr=g_utf8_next_char(*ptr),c=g_utf8_get_char(*ptr)) + g_string_append_unichar(word,c); return g_string_free(word,FALSE); } diff -r 9a5f6d17e86e -r 7a62c77a0dbe bookloupe/bookloupe.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bookloupe/bookloupe.h Sat Sep 21 23:40:18 2013 +0100 @@ -0,0 +1,87 @@ +#ifndef BOOKLOUPE_H +#define BOOKLOUPE_H + +/* special characters */ +#define CHAR_SPACE 32 +#define CHAR_TAB 9 +#define CHAR_LF 10 +#define CHAR_CR 13 +#define CHAR_DQUOTE 34 +#define CHAR_SQUOTE 39 +#define CHAR_OPEN_SQUOTE 96 +#define CHAR_TILDE 126 +#define CHAR_ASTERISK 42 +#define CHAR_FORESLASH 47 +#define CHAR_CARAT 94 + +#define CHAR_UNDERSCORE '_' +#define CHAR_OPEN_CBRACK '{' +#define CHAR_CLOSE_CBRACK '}' +#define CHAR_OPEN_RBRACK '(' +#define CHAR_CLOSE_RBRACK ')' +#define CHAR_OPEN_SBRACK '[' +#define CHAR_CLOSE_SBRACK ']' + +#define CHAR_LS_QUOTE 0x2018 +#define CHAR_RS_QUOTE 0x2019 + +#define CHAR_IS_SQUOTE(c) ((c)==CHAR_SQUOTE || (c)==CHAR_OPEN_SQUOTE || \ + (c)==CHAR_LS_QUOTE || (c)==CHAR_RS_QUOTE) + +#define CHAR_IS_APOSTROPHE(c) ((c)==CHAR_SQUOTE || (c)==CHAR_RS_QUOTE) + +#define CHAR_IS_CLOSING_QUOTE(c) \ + ((c)==CHAR_DQUOTE || (c)==CHAR_SQUOTE || (c)==CHAR_RS_QUOTE) + +/* longest and shortest normal PG line lengths */ +#define LONGEST_PG_LINE 75 +#define WAY_TOO_LONG 80 +#define SHORTEST_PG_LINE 55 + +enum { + ECHO_SWITCH, + SQUOTE_SWITCH, + TYPO_SWITCH, + QPARA_SWITCH, + PARANOID_SWITCH, + LINE_END_SWITCH, + OVERVIEW_SWITCH, + STDOUT_SWITCH, + HEADER_SWITCH, + WEB_SWITCH, + VERBOSE_SWITCH, + MARKUP_SWITCH, + USERTYPO_SWITCH, + DP_SWITCH, + SWITNO +}; + +struct first_pass_results { + long firstline,astline; + long footerline,totlen,binlen,alphalen,endquote_count,shortline,dotcomma; + long fslashline,hyphens,longline,verylongline,htmcount,standalone_digit; + long spacedash,emdash,space_emdash,non_PG_space_emdash,PG_space_emdash; + int Dutchcount,Frenchcount; +}; + +struct warnings { + int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen; + int endquote; + gboolean isDutch,isFrench; +}; + +struct line_properties { + unsigned int len,blen; + gunichar start; +}; + +struct parities { + int dquote,squote; +}; + +struct pending { + char *dquote,*squote,*rbrack,*sbrack,*cbrack,*unders; + long squot; +}; + +#endif /* BOOKOUPE_H */ diff -r 9a5f6d17e86e -r 7a62c77a0dbe bookloupe/counters.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bookloupe/counters.c Sat Sep 21 23:40:18 2013 +0100 @@ -0,0 +1,106 @@ +#include +#include +#include "bookloupe.h" +#include "counters.h" + +struct matching_counter { + int open,close; +}; + +static struct matching_counter *matching_counter_new(void) +{ + return g_slice_new0(struct matching_counter); +} + +static void matching_counter_free(struct matching_counter *counter) +{ + g_slice_free(struct matching_counter,counter); +} + +static gint compar_unichars(gconstpointer a,gconstpointer b,gpointer unused) +{ + /* + * Unicode code points only go up to 0x10FFFF and thus this cannot overflow. + */ + return GPOINTER_TO_INT(a)-GPOINTER_TO_INT(b); +} + +/* + * For matching characters, we maintain a count of the opens and closes. + * In the simplest case, we are dealing with a matching pair such as [ and ] + * where there is a 1:1 mapping between an instance of [ with an open and + * between an instance of ] with a close. matching_ket() is + * responsible for selecting an arbitary base character of a matching pair. + */ +static gpointer matching_key(gunichar ch) +{ + gunichar mirrored; + if (g_unichar_get_mirror_char(ch,&mirrored)) + if (chmatching) + counters->matching=g_tree_new_full(compar_unichars,NULL,NULL, + (GDestroyNotify)matching_counter_free); + key=matching_key(ch); + if (!g_tree_lookup_extended(counters->matching,key,&orig_key, + (gpointer *)&value)) + { + value=matching_counter_new(); + g_tree_insert(counters->matching,key,value); + } + if (open) + value->open++; + else + value->close++; +} + +int matching_count(const struct counters *counters,gunichar ch,gboolean open) +{ + struct matching_counter *value; + if (!counters->matching) + return 0; + value=g_tree_lookup(counters->matching,matching_key(ch)); + if (!value) + return 0; + return open?value->open:value->close; +} + +/* + * Return open count - closed count + */ +int matching_difference(const struct counters *counters,gunichar ch) +{ + struct matching_counter *value; + if (!counters->matching) + return 0; + value=g_tree_lookup(counters->matching,matching_key(ch)); + if (!value) + return 0; + return value->open-value->close; +} + +void counters_destroy(struct counters *counters) +{ + if (counters->matching) + { + g_tree_destroy(counters->matching); + counters->matching=NULL; + } +} diff -r 9a5f6d17e86e -r 7a62c77a0dbe bookloupe/counters.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bookloupe/counters.h Sat Sep 21 23:40:18 2013 +0100 @@ -0,0 +1,17 @@ +#ifndef COUNTERS_H +#define COUNTERS_H + +#include + +struct counters { + GTree *matching; + long quot; + int c_unders; +}; + +void increment_matching(struct counters *counters,gunichar ch,gboolean open); +int matching_count(const struct counters *counters,gunichar ch,gboolean open); +int matching_difference(const struct counters *counters,gunichar ch); +void counters_destroy(struct counters *counters); + +#endif /* COUNTERS_H */ diff -r 9a5f6d17e86e -r 7a62c77a0dbe doc/bookloupe.txt --- a/doc/bookloupe.txt Tue Sep 17 20:55:57 2013 +0100 +++ b/doc/bookloupe.txt Sat Sep 21 23:40:18 2013 +0100 @@ -77,8 +77,8 @@ to see all unclosed quotes, even where the next paragraph begins with a quote, you should use the -p switch. - Singlequotes (') are a problem, since the same character - is used for an apostrophe. I'm not sure that it is + Singlequotes (' and ’) are a problem, since the same + character is used for an apostrophe. I'm not sure that it is possible to get 100% accuracy on singlequotes checking, particularly since dialect, quite common in PG texts, upsets the normal rules so badly. Consider the sentence: diff -r 9a5f6d17e86e -r 7a62c77a0dbe test/bookloupe/Makefile.am --- a/test/bookloupe/Makefile.am Tue Sep 17 20:55:57 2013 +0100 +++ b/test/bookloupe/Makefile.am Sat Sep 21 23:40:18 2013 +0100 @@ -1,4 +1,5 @@ TESTS_ENVIRONMENT=BOOKLOUPE=../../bookloupe/bookloupe ../harness/loupe-test -TESTS=non-ascii.tst long-line.tst +TESTS=non-ascii.tst long-line.tst curved-single-quotes.tst \ + curved-genitives.tst dist_pkgdata_DATA=$(TESTS) diff -r 9a5f6d17e86e -r 7a62c77a0dbe test/bookloupe/curved-genitives.tst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/bookloupe/curved-genitives.tst Sat Sep 21 23:40:18 2013 +0100 @@ -0,0 +1,12 @@ +**************** INPUT **************** +The genitive case of single nouns is normally formed like this: + +The fireworks known as Serpent’s Eggs, or PHARAOH’S SERPENTS. + +What should never happen is something like this: + +At this suggestion Nellie’S face grew crimson. +**************** EXPECTED **************** + +At this suggestion Nellie’S face grew crimson. + Line 7 column 27 - Capital "S"? diff -r 9a5f6d17e86e -r 7a62c77a0dbe test/bookloupe/curved-single-quotes.tst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/bookloupe/curved-single-quotes.tst Sat Sep 21 23:40:18 2013 +0100 @@ -0,0 +1,55 @@ +**************** OPTIONS **************** +-s +**************** INPUT **************** +‘Now you should start for school, ’ Margaret said. + +‘In a moment,’ Peter replied,‘ I'm just coming.’ + +‘Come on,’shouted Jane. + +‘Alright,’ said Peter, ‘Keep your hair on. +’ He looked down as he came round the corner. +‘Where's my coat? ’ + +`Underneath the girls’ scarves.’ said his mother. + +Grabbing it, he joined the others as they set out. +**************** WARNINGS **************** + + + + Spaced singlequote? + + + + Wrongspaced singlequotes? + + + + Wrongspaced singlequotes? + + + + Mismatched singlequotes? + + + + Spaced quote? + + + + Spaced quote? + + + + Mismatched singlequotes? + + + + Wrongspaced singlequotes? + + + + Mismatched singlequotes? + + diff -r 9a5f6d17e86e -r 7a62c77a0dbe test/compatibility/Makefile.am --- a/test/compatibility/Makefile.am Tue Sep 17 20:55:57 2013 +0100 +++ b/test/compatibility/Makefile.am Sat Sep 21 23:40:18 2013 +0100 @@ -7,6 +7,6 @@ dashes.tst control-characters.tst unusual-characters.tst \ windows-1252.tst periods.tst long-line.tst unmarked-paragraph.tst \ hebe-jeebies.tst mail-from.tst scannos.tst before-comma.tst \ - before-period.tst double-punctuation.tst genatives.tst embedded-cr.tst + before-period.tst double-punctuation.tst genitives.tst embedded-cr.tst dist_pkgdata_DATA=$(TESTS) diff -r 9a5f6d17e86e -r 7a62c77a0dbe test/compatibility/genatives.tst --- a/test/compatibility/genatives.tst Tue Sep 17 20:55:57 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -**************** INPUT **************** -The genative case of single nouns is normally formed like this: - -The fireworks known as Serpent's Eggs, or PHARAOH'S SERPENTS. - -What should never happen is something like this: - -At this suggestion Nellie'S face grew crimson. -**************** EXPECTED **************** - -At this suggestion Nellie'S face grew crimson. - Line 7 column 27 - Capital "S"? diff -r 9a5f6d17e86e -r 7a62c77a0dbe test/compatibility/genitives.tst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compatibility/genitives.tst Sat Sep 21 23:40:18 2013 +0100 @@ -0,0 +1,12 @@ +**************** INPUT **************** +The genitive case of single nouns is normally formed like this: + +The fireworks known as Serpent's Eggs, or PHARAOH'S SERPENTS. + +What should never happen is something like this: + +At this suggestion Nellie'S face grew crimson. +**************** EXPECTED **************** + +At this suggestion Nellie'S face grew crimson. + Line 7 column 27 - Capital "S"?