1.1 --- a/bookloupe/bookloupe.c Wed Oct 02 23:51:18 2013 +0100
1.2 +++ b/bookloupe/bookloupe.c Wed Oct 02 23:58:38 2013 +0100
1.3 @@ -32,6 +32,9 @@
1.4 #include "pending.h"
1.5 #include "HTMLentities.h"
1.6
1.7 +gchar *charset; /* Or NULL for auto (ISO_8859-1/ASCII or UNICODE) */
1.8 +GIConv charset_validator=(GIConv)-1;
1.9 +
1.10 gchar *prevline;
1.11
1.12 /* Common typos. */
1.13 @@ -127,36 +130,101 @@
1.14 };
1.15
1.16 gboolean pswit[SWITNO]; /* program switches */
1.17 +gchar *opt_charset;
1.18 +
1.19 +gboolean typo_compat,paranoid_compat;
1.20
1.21 static GOptionEntry options[]={
1.22 { "dp", 'd', 0, G_OPTION_ARG_NONE, pswit+DP_SWITCH,
1.23 "Ignore DP-specific markup", NULL },
1.24 - { "noecho", 'e', 0, G_OPTION_ARG_NONE, pswit+ECHO_SWITCH,
1.25 + { "no-dp", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
1.26 + G_OPTION_ARG_NONE, pswit+DP_SWITCH,
1.27 + "Don't ignore DP-specific markup", NULL },
1.28 + { "echo", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, pswit+ECHO_SWITCH,
1.29 + "Echo queried line", NULL },
1.30 + { "no-echo", 'e', G_OPTION_FLAG_REVERSE,
1.31 + G_OPTION_ARG_NONE, pswit+ECHO_SWITCH,
1.32 "Don't echo queried line", NULL },
1.33 { "squote", 's', 0, G_OPTION_ARG_NONE, pswit+SQUOTE_SWITCH,
1.34 "Check single quotes", NULL },
1.35 - { "typo", 't', 0, G_OPTION_ARG_NONE, pswit+TYPO_SWITCH,
1.36 + { "no-squote", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
1.37 + G_OPTION_ARG_NONE, pswit+SQUOTE_SWITCH,
1.38 + "Don't check single quotes", NULL },
1.39 + { "typo", 0, 0, G_OPTION_ARG_NONE, pswit+TYPO_SWITCH,
1.40 "Check common typos", NULL },
1.41 + { "no-typo", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
1.42 + G_OPTION_ARG_NONE, pswit+TYPO_SWITCH,
1.43 + "Don't check common typos", NULL },
1.44 { "qpara", 'p', 0, G_OPTION_ARG_NONE, pswit+QPARA_SWITCH,
1.45 "Require closure of quotes on every paragraph", NULL },
1.46 - { "relaxed", 'x', 0, G_OPTION_ARG_NONE, pswit+PARANOID_SWITCH,
1.47 + { "no-qpara", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
1.48 + G_OPTION_ARG_NONE, pswit+QPARA_SWITCH,
1.49 + "Don't require closure of quotes on every paragraph", NULL },
1.50 + { "paranoid", 0, G_OPTION_FLAG_HIDDEN,
1.51 + G_OPTION_ARG_NONE, pswit+PARANOID_SWITCH,
1.52 + "Enable paranoid querying of everything", NULL },
1.53 + { "no-paranoid", 0, G_OPTION_FLAG_REVERSE,
1.54 + G_OPTION_ARG_NONE, pswit+PARANOID_SWITCH,
1.55 "Disable paranoid querying of everything", NULL },
1.56 - { "line-end", 'l', 0, G_OPTION_ARG_NONE, pswit+LINE_END_SWITCH,
1.57 - "Disable line end checking", NULL },
1.58 + { "line-end", 0, G_OPTION_FLAG_HIDDEN,
1.59 + G_OPTION_ARG_NONE, pswit+LINE_END_SWITCH,
1.60 + "Enable line end checking", NULL },
1.61 + { "no-line-end", 'l', G_OPTION_FLAG_REVERSE,
1.62 + G_OPTION_ARG_NONE, pswit+LINE_END_SWITCH,
1.63 + "Diable line end checking", NULL },
1.64 { "overview", 'o', 0, G_OPTION_ARG_NONE, pswit+OVERVIEW_SWITCH,
1.65 "Overview: just show counts", NULL },
1.66 + { "no-overview", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
1.67 + G_OPTION_ARG_NONE, pswit+OVERVIEW_SWITCH,
1.68 + "Show individual warnings", NULL },
1.69 { "stdout", 'y', 0, G_OPTION_ARG_NONE, pswit+STDOUT_SWITCH,
1.70 "Output errors to stdout instead of stderr", NULL },
1.71 + { "no-stdout", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
1.72 + G_OPTION_ARG_NONE, pswit+STDOUT_SWITCH,
1.73 + "Output errors to stderr instead of stdout", NULL },
1.74 { "header", 'h', 0, G_OPTION_ARG_NONE, pswit+HEADER_SWITCH,
1.75 "Echo header fields", NULL },
1.76 + { "no-header", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
1.77 + G_OPTION_ARG_NONE, pswit+HEADER_SWITCH,
1.78 + "Don't echo header fields", NULL },
1.79 { "markup", 'm', 0, G_OPTION_ARG_NONE, pswit+MARKUP_SWITCH,
1.80 "Ignore markup in < >", NULL },
1.81 + { "no-markup", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
1.82 + G_OPTION_ARG_NONE, pswit+MARKUP_SWITCH,
1.83 + "No special handling for markup in < >", NULL },
1.84 { "usertypo", 'u', 0, G_OPTION_ARG_NONE, pswit+USERTYPO_SWITCH,
1.85 "Use file of user-defined typos", NULL },
1.86 + { "no-usertypo", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
1.87 + G_OPTION_ARG_NONE, pswit+USERTYPO_SWITCH,
1.88 + "Ignore file of user-defined typos", NULL },
1.89 + { "verbose", 'v', 0, G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
1.90 + "Verbose - list everything", NULL },
1.91 + { "no-verbose", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
1.92 + G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
1.93 + "Switch off verbose mode", NULL },
1.94 + { "charset", 0, 0, G_OPTION_ARG_STRING, &opt_charset,
1.95 + "Set of characters valid for this ebook", "NAME" },
1.96 + { NULL }
1.97 +};
1.98 +
1.99 +/*
1.100 + * Options relating to configuration which make no sense from inside
1.101 + * a configuration file.
1.102 + */
1.103 +
1.104 +static GOptionEntry config_options[]={
1.105 { "web", 'w', 0, G_OPTION_ARG_NONE, pswit+WEB_SWITCH,
1.106 "Defaults for use on www upload", NULL },
1.107 - { "verbose", 'v', 0, G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
1.108 - "Verbose - list everything", NULL },
1.109 + { "dump-config", 0, 0, G_OPTION_ARG_NONE, pswit+DUMP_CONFIG_SWITCH,
1.110 + "Dump current config settings", NULL },
1.111 + { NULL }
1.112 +};
1.113 +
1.114 +static GOptionEntry compatibility_options[]={
1.115 + { "toggle-typo", 't', 0, G_OPTION_ARG_NONE, &typo_compat,
1.116 + "Toggle checking for common typos", NULL },
1.117 + { "toggle-relaxed", 'x', 0, G_OPTION_ARG_NONE, ¶noid_compat,
1.118 + "Toggle both paranoid mode and common typos", NULL },
1.119 { NULL }
1.120 };
1.121
1.122 @@ -200,31 +268,275 @@
1.123 UINT saved_cp;
1.124 #endif
1.125
1.126 +gboolean set_charset(const char *name,GError **err)
1.127 +{
1.128 + /* The various UNICODE encodings all share the same character set. */
1.129 + const char *unicode_aliases[]={ "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4",
1.130 + "UCS-4BE", "UCS-4LE", "UCS2", "UCS4", "UNICODE", "UNICODEBIG",
1.131 + "UNICODELITTLE", "UTF-7", "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE",
1.132 + "UTF-32", "UTF-32BE", "UTF-32LE", "UTF7", "UTF8", "UTF16", "UTF16BE",
1.133 + "UTF16LE", "UTF32", "UTF32BE", "UTF32LE" };
1.134 + int i;
1.135 + if (charset)
1.136 + g_free(charset);
1.137 + if (charset_validator!=(GIConv)-1)
1.138 + g_iconv_close(charset_validator);
1.139 + if (!name || !g_strcasecmp(name,"auto"))
1.140 + {
1.141 + charset=NULL;
1.142 + charset_validator=(GIConv)-1;
1.143 + return TRUE;
1.144 + }
1.145 + else
1.146 + charset=g_strdup(name);
1.147 + for(i=0;i<G_N_ELEMENTS(unicode_aliases);i++)
1.148 + if (!g_strcasecmp(charset,unicode_aliases[i]))
1.149 + {
1.150 + g_free(charset);
1.151 + charset=g_strdup("UTF-8");
1.152 + break;
1.153 + }
1.154 + if (!strcmp(charset,"UTF-8"))
1.155 + charset_validator=(GIConv)-1;
1.156 + else
1.157 + {
1.158 + charset_validator=g_iconv_open(charset,"UTF-8");
1.159 + if (charset_validator==(GIConv)-1)
1.160 + {
1.161 + g_set_error(err,G_CONVERT_ERROR,G_CONVERT_ERROR_NO_CONVERSION,
1.162 + "Unknown character set \"%s\"",charset);
1.163 + return FALSE;
1.164 + }
1.165 + }
1.166 + return TRUE;
1.167 +}
1.168 +
1.169 +GKeyFile *config;
1.170 +
1.171 +void config_file_update(GKeyFile *kf)
1.172 +{
1.173 + int i;
1.174 + const char *s;
1.175 + gboolean sw;
1.176 + for(i=0;options[i].long_name;i++)
1.177 + {
1.178 + if (g_str_has_prefix(options[i].long_name,"no-"))
1.179 + continue;
1.180 + if (options[i].arg==G_OPTION_ARG_NONE)
1.181 + {
1.182 + sw=*(gboolean *)options[i].arg_data;
1.183 + if (options[i].flags&G_OPTION_FLAG_REVERSE)
1.184 + sw=!sw;
1.185 + g_key_file_set_boolean(kf,"options",options[i].long_name,sw);
1.186 + }
1.187 + else if (options[i].arg==G_OPTION_ARG_STRING)
1.188 + {
1.189 + s=*(gchar **)options[i].arg_data;
1.190 + if (!s)
1.191 + s="auto";
1.192 + g_key_file_set_string(kf,"options",options[i].long_name,s);
1.193 + }
1.194 + else
1.195 + g_assert_not_reached();
1.196 + }
1.197 +}
1.198 +
1.199 +void config_file_add_comments(GKeyFile *kf)
1.200 +{
1.201 + int i;
1.202 + gchar *comment;
1.203 + g_key_file_set_comment(kf,NULL,NULL," Default configuration for bookloupe",
1.204 + NULL);
1.205 + for(i=0;options[i].long_name;i++)
1.206 + {
1.207 + if (g_str_has_prefix(options[i].long_name,"no-"))
1.208 + continue;
1.209 + comment=g_strconcat(" ",options[i].description,NULL);
1.210 + g_key_file_set_comment(kf,"options",options[i].long_name,comment,NULL);
1.211 + g_free(comment);
1.212 + }
1.213 +}
1.214 +
1.215 +void dump_config(void)
1.216 +{
1.217 + gchar *s;
1.218 + if (config)
1.219 + config_file_update(config);
1.220 + else
1.221 + {
1.222 + config=g_key_file_new();
1.223 + config_file_update(config);
1.224 + config_file_add_comments(config);
1.225 + }
1.226 + s=g_key_file_to_data(config,NULL,NULL);
1.227 + if (s)
1.228 + g_print("%s",s);
1.229 + g_free(s);
1.230 +}
1.231 +
1.232 +GKeyFile *read_config_file(gchar **full_path)
1.233 +{
1.234 + int i;
1.235 + GError *err=NULL;
1.236 + gchar **search_dirs;
1.237 + gchar *path;
1.238 + const char *search_path;
1.239 + GKeyFile *kf;
1.240 + kf=g_key_file_new();
1.241 + search_path=g_getenv("BOOKLOUPE_CONFIG_PATH");
1.242 + if (search_path)
1.243 + {
1.244 +#ifdef __WIN32__
1.245 + search_dirs=g_strsplit(search_path,";",0);
1.246 +#else
1.247 + search_dirs=g_strsplit(search_path,":",0);
1.248 +#endif
1.249 + }
1.250 + else
1.251 + {
1.252 + search_dirs=g_new(gchar *,4);
1.253 + search_dirs[0]=g_get_current_dir();
1.254 + search_dirs[1]=g_strdup(running_from);
1.255 + search_dirs[2]=g_strdup(g_get_user_config_dir());
1.256 + search_dirs[3]=NULL;
1.257 + }
1.258 + for(i=0;search_dirs[i];i++)
1.259 + {
1.260 + path=g_build_filename(search_dirs[i],"bookloupe.ini",NULL);
1.261 + if (g_key_file_load_from_file(kf,path,
1.262 + G_KEY_FILE_KEEP_COMMENTS|G_KEY_FILE_KEEP_TRANSLATIONS,&err))
1.263 + break;
1.264 + if (!g_error_matches(err,G_FILE_ERROR,G_FILE_ERROR_NOENT))
1.265 + {
1.266 + g_printerr("Bookloupe: Error reading %s\n",path);
1.267 + g_printerr("%s\n",err->message);
1.268 + exit(1);
1.269 + }
1.270 + g_clear_error(&err);
1.271 + g_free(path);
1.272 + path=NULL;
1.273 + }
1.274 + if (!search_dirs[i])
1.275 + {
1.276 + g_key_file_free(kf);
1.277 + kf=NULL;
1.278 + }
1.279 + g_strfreev(search_dirs);
1.280 + if (full_path && kf)
1.281 + *full_path=path;
1.282 + else
1.283 + g_free(path);
1.284 + return kf;
1.285 +}
1.286 +
1.287 +void parse_config_file(void)
1.288 +{
1.289 + int i,j;
1.290 + gchar *path,*s;
1.291 + gchar **keys;
1.292 + gboolean sw;
1.293 + GError *err=NULL;
1.294 + config=read_config_file(&path);
1.295 + if (config)
1.296 + keys=g_key_file_get_keys(config,"options",NULL,NULL);
1.297 + else
1.298 + keys=NULL;
1.299 + if (keys)
1.300 + {
1.301 + for(i=0;keys[i];i++)
1.302 + {
1.303 + for(j=0;options[j].long_name;j++)
1.304 + {
1.305 + if (g_str_has_prefix(options[j].long_name,"no-"))
1.306 + continue;
1.307 + else if (!strcmp(keys[i],options[j].long_name))
1.308 + {
1.309 + if (options[j].arg==G_OPTION_ARG_NONE)
1.310 + {
1.311 + sw=g_key_file_get_boolean(config,"options",keys[i],
1.312 + &err);
1.313 + if (err)
1.314 + {
1.315 + g_printerr("Bookloupe: %s: options.%s: %s\n",
1.316 + path,keys[i],err->message);
1.317 + g_clear_error(&err);
1.318 + }
1.319 + else
1.320 + {
1.321 + if (options[j].flags&G_OPTION_FLAG_REVERSE)
1.322 + sw=!sw;
1.323 + *(gboolean *)options[j].arg_data=sw;
1.324 + }
1.325 + break;
1.326 + }
1.327 + else if (options[j].arg==G_OPTION_ARG_STRING)
1.328 + {
1.329 + s=g_key_file_get_string(config,"options",keys[i],
1.330 + &err);
1.331 + if (err)
1.332 + {
1.333 + g_printerr("Bookloupe: %s: options.%s: %s\n",
1.334 + path,keys[i],err->message);
1.335 + g_clear_error(&err);
1.336 + }
1.337 + else
1.338 + {
1.339 + g_free(*(gchar **)options[j].arg_data);
1.340 + if (!g_strcmp0(s,"auto"))
1.341 + {
1.342 + *(gchar **)options[j].arg_data=NULL;
1.343 + g_free(s);
1.344 + }
1.345 + else
1.346 + *(gchar **)options[j].arg_data=s;
1.347 + }
1.348 + break;
1.349 + }
1.350 + else
1.351 + g_assert_not_reached();
1.352 + }
1.353 + }
1.354 + if (!options[j].long_name)
1.355 + g_printerr("Bookloupe: %s: Unknown option \"%s\" ignored\n",
1.356 + path,keys[i]);
1.357 + }
1.358 + g_strfreev(keys);
1.359 + }
1.360 + if (config)
1.361 + g_free(path);
1.362 +}
1.363 +
1.364 void parse_options(int *argc,char ***argv)
1.365 {
1.366 GError *err=NULL;
1.367 GOptionContext *context;
1.368 + GOptionGroup *compatibility;
1.369 context=g_option_context_new(
1.370 - "file - looks for errors in Project Gutenberg(TM) etexts");
1.371 + "file - look for errors in Project Gutenberg(TM) etexts");
1.372 g_option_context_add_main_entries(context,options,NULL);
1.373 + g_option_context_add_main_entries(context,config_options,NULL);
1.374 + compatibility=g_option_group_new("compatibility",
1.375 + "Options for Compatibility with Gutcheck:",
1.376 + "Show compatibility options",NULL,NULL);
1.377 + g_option_group_add_entries(compatibility,compatibility_options);
1.378 + g_option_context_add_group(context,compatibility);
1.379 + g_option_context_set_description(context,
1.380 + "For simplicity, only the switch options which reverse the\n"
1.381 + "default configuration are listed. In most cases, both vanilla\n"
1.382 + "and \"no-\" prefixed versions are available for use.");
1.383 if (!g_option_context_parse(context,argc,argv,&err))
1.384 {
1.385 g_printerr("Bookloupe: %s\n",err->message);
1.386 g_printerr("Use \"%s --help\" for help\n",(*argv)[0]);
1.387 exit(1);
1.388 }
1.389 - /* Paranoid checking is turned OFF, not on, by its switch */
1.390 - pswit[PARANOID_SWITCH]=!pswit[PARANOID_SWITCH];
1.391 - if (pswit[PARANOID_SWITCH])
1.392 - /* if running in paranoid mode, typo checks default to enabled */
1.393 + if (typo_compat)
1.394 pswit[TYPO_SWITCH]=!pswit[TYPO_SWITCH];
1.395 - /* Line-end checking is turned OFF, not on, by its switch */
1.396 - pswit[LINE_END_SWITCH]=!pswit[LINE_END_SWITCH];
1.397 - /* Echoing is turned OFF, not on, by its switch */
1.398 - pswit[ECHO_SWITCH]=!pswit[ECHO_SWITCH];
1.399 - if (pswit[OVERVIEW_SWITCH])
1.400 - /* just print summary; don't echo */
1.401 - pswit[ECHO_SWITCH]=FALSE;
1.402 + if (paranoid_compat)
1.403 + {
1.404 + pswit[PARANOID_SWITCH]=!pswit[PARANOID_SWITCH];
1.405 + pswit[TYPO_SWITCH]=!pswit[TYPO_SWITCH];
1.406 + }
1.407 /*
1.408 * Web uploads - for the moment, this is really just a placeholder
1.409 * until we decide what processing we really want to do on web uploads
1.410 @@ -246,6 +558,21 @@
1.411 pswit[USERTYPO_SWITCH]=FALSE;
1.412 pswit[DP_SWITCH]=FALSE;
1.413 }
1.414 + if (opt_charset && !set_charset(opt_charset,&err))
1.415 + {
1.416 + g_printerr("%s\n",err->message);
1.417 + exit(1);
1.418 + }
1.419 + if (pswit[DUMP_CONFIG_SWITCH])
1.420 + {
1.421 + dump_config();
1.422 + exit(0);
1.423 + }
1.424 + g_free(opt_charset);
1.425 + opt_charset=NULL;
1.426 + if (pswit[OVERVIEW_SWITCH])
1.427 + /* just print summary; don't echo */
1.428 + pswit[ECHO_SWITCH]=FALSE;
1.429 if (*argc<2)
1.430 {
1.431 proghelp(context);
1.432 @@ -305,7 +632,11 @@
1.433 exit(1);
1.434 }
1.435 if (g_utf8_validate(contents,len,NULL))
1.436 + {
1.437 utf8=g_utf8_normalize(contents,len,G_NORMALIZE_DEFAULT_COMPOSE);
1.438 + if (!charset)
1.439 + (void)set_charset("UNICODE",NULL);
1.440 + }
1.441 else
1.442 utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
1.443 g_free(contents);
1.444 @@ -388,6 +719,15 @@
1.445 saved_cp=GetConsoleOutputCP();
1.446 #endif
1.447 running_from=g_path_get_dirname(argv[0]);
1.448 + /* Paranoid checking is turned OFF, not on, by its switch */
1.449 + pswit[PARANOID_SWITCH]=TRUE;
1.450 + /* if running in paranoid mode, typo checks default to enabled */
1.451 + pswit[TYPO_SWITCH]=TRUE;
1.452 + /* Line-end checking is turned OFF, not on, by its switch */
1.453 + pswit[LINE_END_SWITCH]=TRUE;
1.454 + /* Echoing is turned OFF, not on, by its switch */
1.455 + pswit[ECHO_SWITCH]=TRUE;
1.456 + parse_config_file();
1.457 parse_options(&argc,&argv);
1.458 if (pswit[USERTYPO_SWITCH])
1.459 read_user_scannos();
1.460 @@ -428,6 +768,9 @@
1.461 g_free(running_from);
1.462 if (usertypo)
1.463 g_tree_unref(usertypo);
1.464 + set_charset(NULL,NULL);
1.465 + if (config)
1.466 + g_key_file_free(config);
1.467 return 0;
1.468 }
1.469
1.470 @@ -708,25 +1051,32 @@
1.471 "Not reporting them.\n",
1.472 results->spacedash+results->non_PG_space_emdash);
1.473 }
1.474 - /* If more than a quarter of characters are hi-bit, bug out. */
1.475 - warnings.bin=1;
1.476 - if (results->binlen*4>results->totlen)
1.477 + if (charset)
1.478 + warnings.bin=0;
1.479 + else
1.480 {
1.481 - g_print(" --> This file does not appear to be ASCII. "
1.482 - "Terminating. Best of luck with it!\n");
1.483 - exit(1);
1.484 - }
1.485 - if (results->alphalen*4<results->totlen)
1.486 - {
1.487 - g_print(" --> This file does not appear to be text. "
1.488 - "Terminating. Best of luck with it!\n");
1.489 - exit(1);
1.490 - }
1.491 - if (results->binlen*100>results->totlen || results->binlen>100)
1.492 - {
1.493 - g_print(" --> There are a lot of foreign letters here. "
1.494 - "Not reporting them.\n");
1.495 - warnings.bin=0;
1.496 + /* Charset ISO_8859-1/ASCII checks for compatibility with gutcheck */
1.497 + warnings.bin=1;
1.498 + /* If more than a quarter of characters are hi-bit, bug out. */
1.499 + if (results->binlen*4>results->totlen)
1.500 + {
1.501 + g_print(" --> This file does not appear to be ASCII. "
1.502 + "Terminating. Best of luck with it!\n");
1.503 + exit(1);
1.504 + }
1.505 + if (results->alphalen*4<results->totlen)
1.506 + {
1.507 + g_print(" --> This file does not appear to be text. "
1.508 + "Terminating. Best of luck with it!\n");
1.509 + exit(1);
1.510 + }
1.511 + if (results->binlen*100>results->totlen || results->binlen>100)
1.512 + {
1.513 + g_print(" --> There are a lot of foreign letters here. "
1.514 + "Not reporting them.\n");
1.515 + if (!pswit[VERBOSE_SWITCH])
1.516 + warnings.bin=0;
1.517 + }
1.518 }
1.519 warnings.isDutch=FALSE;
1.520 if (results->Dutchcount>50)
1.521 @@ -754,7 +1104,6 @@
1.522 g_print("\n");
1.523 if (pswit[VERBOSE_SWITCH])
1.524 {
1.525 - warnings.bin=1;
1.526 warnings.shortline=1;
1.527 warnings.dotcomma=1;
1.528 warnings.longline=1;
1.529 @@ -949,14 +1298,17 @@
1.530 gboolean isemptyline)
1.531 {
1.532 /* Don't repeat multiple warnings on one line. */
1.533 - gboolean eNon_A=FALSE,eTab=FALSE,eTilde=FALSE;
1.534 + gboolean eInvalidChar=FALSE,eTab=FALSE,eTilde=FALSE;
1.535 gboolean eCarat=FALSE,eFSlash=FALSE,eAst=FALSE;
1.536 const char *s;
1.537 gunichar c;
1.538 + gsize nb;
1.539 + gchar *t;
1.540 for (s=aline;*s;s=g_utf8_next_char(s))
1.541 {
1.542 c=g_utf8_get_char(s);
1.543 - if (!eNon_A && (c<CHAR_SPACE && c!='\t' && c!='\n' || c>127))
1.544 + if (warnings->bin && !eInvalidChar &&
1.545 + (c<CHAR_SPACE && c!='\t' && c!='\n' || c>127))
1.546 {
1.547 if (pswit[ECHO_SWITCH])
1.548 g_print("\n%s\n",aline);
1.549 @@ -971,7 +1323,57 @@
1.550 linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
1.551 else
1.552 cnt_bin++;
1.553 - eNon_A=TRUE;
1.554 + eInvalidChar=TRUE;
1.555 + }
1.556 + if (!eInvalidChar && charset)
1.557 + {
1.558 + if (charset_validator==(GIConv)-1)
1.559 + {
1.560 + if (!g_unichar_isdefined(c))
1.561 + {
1.562 + if (pswit[ECHO_SWITCH])
1.563 + g_print("\n%s\n",aline);
1.564 + if (!pswit[OVERVIEW_SWITCH])
1.565 + g_print(" Line %ld column %ld - Unassigned UNICODE "
1.566 + "code point U+%04" G_GINT32_MODIFIER "X\n",
1.567 + linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
1.568 + else
1.569 + cnt_bin++;
1.570 + eInvalidChar=TRUE;
1.571 + }
1.572 + else if (c>=0xE000 && c<=0xF8FF || c>=0xF0000 && c<=0xFFFFD ||
1.573 + c>=100000 && c<=0x10FFFD)
1.574 + {
1.575 + if (pswit[ECHO_SWITCH])
1.576 + g_print("\n%s\n",aline);
1.577 + if (!pswit[OVERVIEW_SWITCH])
1.578 + g_print(" Line %ld column %ld - Private Use "
1.579 + "character U+%04" G_GINT32_MODIFIER "X\n",
1.580 + linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
1.581 + else
1.582 + cnt_bin++;
1.583 + eInvalidChar=TRUE;
1.584 + }
1.585 + }
1.586 + else
1.587 + {
1.588 + t=g_convert_with_iconv(s,g_utf8_next_char(s)-s,
1.589 + charset_validator,NULL,&nb,NULL);
1.590 + if (t)
1.591 + g_free(t);
1.592 + else
1.593 + {
1.594 + if (pswit[ECHO_SWITCH])
1.595 + g_print("\n%s\n",aline);
1.596 + if (!pswit[OVERVIEW_SWITCH])
1.597 + g_print(" Line %ld column %ld - Non-%s "
1.598 + "character %u\n",linecnt,
1.599 + g_utf8_pointer_to_offset(aline,s)+1,charset,c);
1.600 + else
1.601 + cnt_bin++;
1.602 + eInvalidChar=TRUE;
1.603 + }
1.604 + }
1.605 }
1.606 if (!eTab && c==CHAR_TAB)
1.607 {
1.608 @@ -2626,8 +3028,7 @@
1.609 if (s>=aline && g_utf8_get_char(s)=='-')
1.610 enddash=TRUE;
1.611 check_for_control_characters(aline);
1.612 - if (warnings->bin)
1.613 - check_for_odd_characters(aline,warnings,isemptyline);
1.614 + check_for_odd_characters(aline,warnings,isemptyline);
1.615 if (warnings->longline)
1.616 check_for_long_line(aline);
1.617 if (warnings->shortline)