1.1 --- a/bookloupe/bookloupe.c Thu May 30 17:16:37 2013 +0100
1.2 +++ b/bookloupe/bookloupe.c Thu May 30 18:33:44 2013 +0100
1.3 @@ -230,6 +230,8 @@
1.4 void loseentities(char *);
1.5 gboolean isroman(const char *);
1.6 void postprocess_for_DP(char *);
1.7 +void print_as_windows_1252(const char *string);
1.8 +void print_as_utf_8(const char *string);
1.9
1.10 GTree *qword,*qperiod;
1.11
1.12 @@ -371,7 +373,10 @@
1.13 g_clear_error(&err);
1.14 exit(1);
1.15 }
1.16 - utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
1.17 + if (g_utf8_validate(contents,len,NULL))
1.18 + utf8=g_utf8_normalize(contents,len,G_NORMALIZE_DEFAULT_COMPOSE);
1.19 + else
1.20 + utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
1.21 g_free(contents);
1.22 lines=g_strsplit_set(utf8,"\r\n",0);
1.23 g_free(utf8);
1.24 @@ -396,7 +401,16 @@
1.25 gsize len,nb;
1.26 if (!g_file_get_contents(filename,&contents,&len,err))
1.27 return NULL;
1.28 - utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
1.29 + if (g_utf8_validate(contents,len,NULL))
1.30 + {
1.31 + utf8=g_utf8_normalize(contents,len,G_NORMALIZE_DEFAULT_COMPOSE);
1.32 + g_set_print_handler(print_as_utf_8);
1.33 + }
1.34 + else
1.35 + {
1.36 + utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
1.37 + g_set_print_handler(print_as_windows_1252);
1.38 + }
1.39 g_free(contents);
1.40 return utf8;
1.41 }
1.42 @@ -2618,6 +2632,11 @@
1.43 fputs(string,stdout);
1.44 }
1.45
1.46 +void print_as_utf_8(const char *string)
1.47 +{
1.48 + fputs(string,stdout);
1.49 +}
1.50 +
1.51 /*
1.52 * procfile:
1.53 *
1.54 @@ -2650,7 +2669,6 @@
1.55 fprintf(stderr,"bookloupe: %s: %s\n",filename,err->message);
1.56 exit(1);
1.57 }
1.58 - g_set_print_handler(print_as_windows_1252);
1.59 g_print("\n\nFile: %s\n\n",filename);
1.60 first_pass_results=first_pass(etext);
1.61 warnings=report_first_pass(first_pass_results);