# HG changeset patch # User ali # Date 1369935224 -3600 # Node ID 52d4a7f926b4e5fb787fef05decfd4a88457a2c5 # Parent 82d3cc398b54e0737093bb229cacf7d2d13ef2c8 Support WINDOWS-1252 characters encoded as UTF-8 diff -r 82d3cc398b54 -r 52d4a7f926b4 bookloupe/bookloupe.c --- a/bookloupe/bookloupe.c Thu May 30 17:16:37 2013 +0100 +++ b/bookloupe/bookloupe.c Thu May 30 18:33:44 2013 +0100 @@ -230,6 +230,8 @@ void loseentities(char *); gboolean isroman(const char *); void postprocess_for_DP(char *); +void print_as_windows_1252(const char *string); +void print_as_utf_8(const char *string); GTree *qword,*qperiod; @@ -371,7 +373,10 @@ g_clear_error(&err); exit(1); } - utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL); + if (g_utf8_validate(contents,len,NULL)) + utf8=g_utf8_normalize(contents,len,G_NORMALIZE_DEFAULT_COMPOSE); + else + utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL); g_free(contents); lines=g_strsplit_set(utf8,"\r\n",0); g_free(utf8); @@ -396,7 +401,16 @@ gsize len,nb; if (!g_file_get_contents(filename,&contents,&len,err)) return NULL; - utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL); + if (g_utf8_validate(contents,len,NULL)) + { + utf8=g_utf8_normalize(contents,len,G_NORMALIZE_DEFAULT_COMPOSE); + g_set_print_handler(print_as_utf_8); + } + else + { + utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL); + g_set_print_handler(print_as_windows_1252); + } g_free(contents); return utf8; } @@ -2618,6 +2632,11 @@ fputs(string,stdout); } +void print_as_utf_8(const char *string) +{ + fputs(string,stdout); +} + /* * procfile: * @@ -2650,7 +2669,6 @@ fprintf(stderr,"bookloupe: %s: %s\n",filename,err->message); exit(1); } - g_set_print_handler(print_as_windows_1252); g_print("\n\nFile: %s\n\n",filename); first_pass_results=first_pass(etext); warnings=report_first_pass(first_pass_results); diff -r 82d3cc398b54 -r 52d4a7f926b4 configure.ac --- a/configure.ac Thu May 30 17:16:37 2013 +0100 +++ b/configure.ac Thu May 30 18:33:44 2013 +0100 @@ -11,6 +11,7 @@ test/Makefile test/harness/Makefile test/compatibility/Makefile +test/bookloupe/Makefile doc/Makefile ]) AM_INIT_AUTOMAKE([no-define 1.11]) diff -r 82d3cc398b54 -r 52d4a7f926b4 test/Makefile.am --- a/test/Makefile.am Thu May 30 17:16:37 2013 +0100 +++ b/test/Makefile.am Thu May 30 18:33:44 2013 +0100 @@ -1,1 +1,1 @@ -SUBDIRS=harness compatibility . +SUBDIRS=harness compatibility bookloupe . diff -r 82d3cc398b54 -r 52d4a7f926b4 test/bookloupe/Makefile.am --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/bookloupe/Makefile.am Thu May 30 18:33:44 2013 +0100 @@ -0,0 +1,4 @@ +TESTS_ENVIRONMENT=BOOKLOUPE=../../bookloupe/bookloupe ../harness/loupe-test +TESTS=non-ascii.tst long-line.tst + +dist_pkgdata_DATA=$(TESTS) diff -r 82d3cc398b54 -r 52d4a7f926b4 test/bookloupe/long-line.tst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/bookloupe/long-line.tst Thu May 30 18:33:44 2013 +0100 @@ -0,0 +1,37 @@ +**************** INPUT **************** +Lines up to seventy five columns should be acceptable and shouldn't trigger +any kind of warning. At seventy six columns, however, one warning is issued. + +Les élèves ont mangés leur petit déjeuner avant le commencement de l'école. +Les pains au chocolat et les petit brioches sont le choix le plus délicieux. + +Unfortunately, with two long lines, we need to drivel on for at least +twenty lines so that more than ninety per cent of the text consists of +non-long lines so that the warnings are not switched off in a misguided +attempt at being helpful. + +“I love to sail the briny deep! + The briny deep for me! +I love to watch the sunlit waves + That brighten up the sea! +I love to listen to the wind + That fills the snowy sails! +I love to roam around the deck----” + + “And eat the fishes’ tails!” +**************** WARNINGS **************** + + + + Long line 76 + + + + Long line 76 + + + + + Query word au - not reporting duplicates + + diff -r 82d3cc398b54 -r 52d4a7f926b4 test/bookloupe/non-ascii.tst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/bookloupe/non-ascii.tst Thu May 30 18:33:44 2013 +0100 @@ -0,0 +1,6 @@ +**************** INPUT **************** +"Hello," he said, "I wanted to bave a tête-à-tête with you." +**************** EXPECTED **************** + +"Hello," he said, "I wanted to bave a tête-à-tête with you." + Line 1 column 31 - Query word bave - not reporting duplicates diff -r 82d3cc398b54 -r 52d4a7f926b4 test/compatibility/unmarked-paragraph.tst --- a/test/compatibility/unmarked-paragraph.tst Thu May 30 17:16:37 2013 +0100 +++ b/test/compatibility/unmarked-paragraph.tst Thu May 30 18:33:44 2013 +0100 @@ -2,7 +2,7 @@ "Hurrah! that's the way to do it!" "Now, then, Tom, see if you can't bring Dick home!" -"Give him a swift one, Frank! Don’t let him hit it!" cried Sam Rover, +"Give him a swift one, Frank! Don't let him hit it!" cried Sam Rover, merrily. **************** WARNINGS ****************