1.1 --- a/bookloupe/bookloupe.c Thu May 30 17:16:37 2013 +0100
1.2 +++ b/bookloupe/bookloupe.c Thu May 30 18:33:44 2013 +0100
1.3 @@ -230,6 +230,8 @@
1.4 void loseentities(char *);
1.5 gboolean isroman(const char *);
1.6 void postprocess_for_DP(char *);
1.7 +void print_as_windows_1252(const char *string);
1.8 +void print_as_utf_8(const char *string);
1.9
1.10 GTree *qword,*qperiod;
1.11
1.12 @@ -371,7 +373,10 @@
1.13 g_clear_error(&err);
1.14 exit(1);
1.15 }
1.16 - utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
1.17 + if (g_utf8_validate(contents,len,NULL))
1.18 + utf8=g_utf8_normalize(contents,len,G_NORMALIZE_DEFAULT_COMPOSE);
1.19 + else
1.20 + utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
1.21 g_free(contents);
1.22 lines=g_strsplit_set(utf8,"\r\n",0);
1.23 g_free(utf8);
1.24 @@ -396,7 +401,16 @@
1.25 gsize len,nb;
1.26 if (!g_file_get_contents(filename,&contents,&len,err))
1.27 return NULL;
1.28 - utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
1.29 + if (g_utf8_validate(contents,len,NULL))
1.30 + {
1.31 + utf8=g_utf8_normalize(contents,len,G_NORMALIZE_DEFAULT_COMPOSE);
1.32 + g_set_print_handler(print_as_utf_8);
1.33 + }
1.34 + else
1.35 + {
1.36 + utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
1.37 + g_set_print_handler(print_as_windows_1252);
1.38 + }
1.39 g_free(contents);
1.40 return utf8;
1.41 }
1.42 @@ -2618,6 +2632,11 @@
1.43 fputs(string,stdout);
1.44 }
1.45
1.46 +void print_as_utf_8(const char *string)
1.47 +{
1.48 + fputs(string,stdout);
1.49 +}
1.50 +
1.51 /*
1.52 * procfile:
1.53 *
1.54 @@ -2650,7 +2669,6 @@
1.55 fprintf(stderr,"bookloupe: %s: %s\n",filename,err->message);
1.56 exit(1);
1.57 }
1.58 - g_set_print_handler(print_as_windows_1252);
1.59 g_print("\n\nFile: %s\n\n",filename);
1.60 first_pass_results=first_pass(etext);
1.61 warnings=report_first_pass(first_pass_results);
2.1 --- a/configure.ac Thu May 30 17:16:37 2013 +0100
2.2 +++ b/configure.ac Thu May 30 18:33:44 2013 +0100
2.3 @@ -11,6 +11,7 @@
2.4 test/Makefile
2.5 test/harness/Makefile
2.6 test/compatibility/Makefile
2.7 +test/bookloupe/Makefile
2.8 doc/Makefile
2.9 ])
2.10 AM_INIT_AUTOMAKE([no-define 1.11])
3.1 --- a/test/Makefile.am Thu May 30 17:16:37 2013 +0100
3.2 +++ b/test/Makefile.am Thu May 30 18:33:44 2013 +0100
3.3 @@ -1,1 +1,1 @@
3.4 -SUBDIRS=harness compatibility .
3.5 +SUBDIRS=harness compatibility bookloupe .
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
4.2 +++ b/test/bookloupe/Makefile.am Thu May 30 18:33:44 2013 +0100
4.3 @@ -0,0 +1,4 @@
4.4 +TESTS_ENVIRONMENT=BOOKLOUPE=../../bookloupe/bookloupe ../harness/loupe-test
4.5 +TESTS=non-ascii.tst long-line.tst
4.6 +
4.7 +dist_pkgdata_DATA=$(TESTS)
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
5.2 +++ b/test/bookloupe/long-line.tst Thu May 30 18:33:44 2013 +0100
5.3 @@ -0,0 +1,37 @@
5.4 +**************** INPUT ****************
5.5 +Lines up to seventy five columns should be acceptable and shouldn't trigger
5.6 +any kind of warning. At seventy six columns, however, one warning is issued.
5.7 +
5.8 +Les élèves ont mangés leur petit déjeuner avant le commencement de l'école.
5.9 +Les pains au chocolat et les petit brioches sont le choix le plus délicieux.
5.10 +
5.11 +Unfortunately, with two long lines, we need to drivel on for at least
5.12 +twenty lines so that more than ninety per cent of the text consists of
5.13 +non-long lines so that the warnings are not switched off in a misguided
5.14 +attempt at being helpful.
5.15 +
5.16 +“I love to sail the briny deep!
5.17 + The briny deep for me!
5.18 +I love to watch the sunlit waves
5.19 + That brighten up the sea!
5.20 +I love to listen to the wind
5.21 + That fills the snowy sails!
5.22 +I love to roam around the deck----”
5.23 +
5.24 + “And eat the fishes’ tails!”
5.25 +**************** WARNINGS ****************
5.26 +<expected>
5.27 + <error>
5.28 + <at line="2" column="76"/>
5.29 + <text>Long line 76</text>
5.30 + </error>
5.31 + <error>
5.32 + <at line="5" column="76"/>
5.33 + <text>Long line 76</text>
5.34 + </error>
5.35 + <false-positive>
5.36 + <at line="5" column="9"/>
5.37 + <at line="5" column="10"/>
5.38 + <text>Query word au - not reporting duplicates</text>
5.39 + </false-positive>
5.40 +</expected>
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
6.2 +++ b/test/bookloupe/non-ascii.tst Thu May 30 18:33:44 2013 +0100
6.3 @@ -0,0 +1,6 @@
6.4 +**************** INPUT ****************
6.5 +"Hello," he said, "I wanted to bave a tête-à-tête with you."
6.6 +**************** EXPECTED ****************
6.7 +
6.8 +"Hello," he said, "I wanted to bave a tête-à-tête with you."
6.9 + Line 1 column 31 - Query word bave - not reporting duplicates
7.1 --- a/test/compatibility/unmarked-paragraph.tst Thu May 30 17:16:37 2013 +0100
7.2 +++ b/test/compatibility/unmarked-paragraph.tst Thu May 30 18:33:44 2013 +0100
7.3 @@ -2,7 +2,7 @@
7.4 "Hurrah! that's the way to do it!" "Now, then, Tom, see if you can't bring
7.5 Dick home!"
7.6
7.7 -"Give him a swift one, Frank! Don’t let him hit it!" cried Sam Rover,
7.8 +"Give him a swift one, Frank! Don't let him hit it!" cried Sam Rover,
7.9 merrily.
7.10 **************** WARNINGS ****************
7.11 <expected>