bookloupe/bookloupe.c
changeset 144 d7a97f077f9e
parent 142 466f43a12118
parent 138 5e27fa988c5c
     1.1 --- a/bookloupe/bookloupe.c	Wed Oct 02 23:51:18 2013 +0100
     1.2 +++ b/bookloupe/bookloupe.c	Wed Oct 02 23:58:38 2013 +0100
     1.3 @@ -32,6 +32,9 @@
     1.4  #include "pending.h"
     1.5  #include "HTMLentities.h"
     1.6  
     1.7 +gchar *charset;		/* Or NULL for auto (ISO_8859-1/ASCII or UNICODE) */
     1.8 +GIConv charset_validator=(GIConv)-1;
     1.9 +
    1.10  gchar *prevline;
    1.11  
    1.12  /* Common typos. */
    1.13 @@ -127,36 +130,101 @@
    1.14  }; 
    1.15  
    1.16  gboolean pswit[SWITNO];  /* program switches */
    1.17 +gchar *opt_charset;
    1.18 +
    1.19 +gboolean typo_compat,paranoid_compat;
    1.20  
    1.21  static GOptionEntry options[]={
    1.22      { "dp", 'd', 0, G_OPTION_ARG_NONE, pswit+DP_SWITCH,
    1.23        "Ignore DP-specific markup", NULL },
    1.24 -    { "noecho", 'e', 0, G_OPTION_ARG_NONE, pswit+ECHO_SWITCH,
    1.25 +    { "no-dp", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    1.26 +      G_OPTION_ARG_NONE, pswit+DP_SWITCH,
    1.27 +      "Don't ignore DP-specific markup", NULL },
    1.28 +    { "echo", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, pswit+ECHO_SWITCH,
    1.29 +      "Echo queried line", NULL },
    1.30 +    { "no-echo", 'e', G_OPTION_FLAG_REVERSE,
    1.31 +      G_OPTION_ARG_NONE, pswit+ECHO_SWITCH,
    1.32        "Don't echo queried line", NULL },
    1.33      { "squote", 's', 0, G_OPTION_ARG_NONE, pswit+SQUOTE_SWITCH,
    1.34        "Check single quotes", NULL },
    1.35 -    { "typo", 't', 0, G_OPTION_ARG_NONE, pswit+TYPO_SWITCH,
    1.36 +    { "no-squote", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    1.37 +      G_OPTION_ARG_NONE, pswit+SQUOTE_SWITCH,
    1.38 +      "Don't check single quotes", NULL },
    1.39 +    { "typo", 0, 0, G_OPTION_ARG_NONE, pswit+TYPO_SWITCH,
    1.40        "Check common typos", NULL },
    1.41 +    { "no-typo", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    1.42 +      G_OPTION_ARG_NONE, pswit+TYPO_SWITCH,
    1.43 +      "Don't check common typos", NULL },
    1.44      { "qpara", 'p', 0, G_OPTION_ARG_NONE, pswit+QPARA_SWITCH,
    1.45        "Require closure of quotes on every paragraph", NULL },
    1.46 -    { "relaxed", 'x', 0, G_OPTION_ARG_NONE, pswit+PARANOID_SWITCH,
    1.47 +    { "no-qpara", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    1.48 +      G_OPTION_ARG_NONE, pswit+QPARA_SWITCH,
    1.49 +      "Don't require closure of quotes on every paragraph", NULL },
    1.50 +    { "paranoid", 0, G_OPTION_FLAG_HIDDEN,
    1.51 +      G_OPTION_ARG_NONE, pswit+PARANOID_SWITCH,
    1.52 +      "Enable paranoid querying of everything", NULL },
    1.53 +    { "no-paranoid", 0, G_OPTION_FLAG_REVERSE,
    1.54 +      G_OPTION_ARG_NONE, pswit+PARANOID_SWITCH,
    1.55        "Disable paranoid querying of everything", NULL },
    1.56 -    { "line-end", 'l', 0, G_OPTION_ARG_NONE, pswit+LINE_END_SWITCH,
    1.57 -      "Disable line end checking", NULL },
    1.58 +    { "line-end", 0, G_OPTION_FLAG_HIDDEN,
    1.59 +      G_OPTION_ARG_NONE, pswit+LINE_END_SWITCH,
    1.60 +      "Enable line end checking", NULL },
    1.61 +    { "no-line-end", 'l', G_OPTION_FLAG_REVERSE,
    1.62 +      G_OPTION_ARG_NONE, pswit+LINE_END_SWITCH,
    1.63 +      "Diable line end checking", NULL },
    1.64      { "overview", 'o', 0, G_OPTION_ARG_NONE, pswit+OVERVIEW_SWITCH,
    1.65        "Overview: just show counts", NULL },
    1.66 +    { "no-overview", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    1.67 +      G_OPTION_ARG_NONE, pswit+OVERVIEW_SWITCH,
    1.68 +      "Show individual warnings", NULL },
    1.69      { "stdout", 'y', 0, G_OPTION_ARG_NONE, pswit+STDOUT_SWITCH,
    1.70        "Output errors to stdout instead of stderr", NULL },
    1.71 +    { "no-stdout", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    1.72 +      G_OPTION_ARG_NONE, pswit+STDOUT_SWITCH,
    1.73 +      "Output errors to stderr instead of stdout", NULL },
    1.74      { "header", 'h', 0, G_OPTION_ARG_NONE, pswit+HEADER_SWITCH,
    1.75        "Echo header fields", NULL },
    1.76 +    { "no-header", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    1.77 +      G_OPTION_ARG_NONE, pswit+HEADER_SWITCH,
    1.78 +      "Don't echo header fields", NULL },
    1.79      { "markup", 'm', 0, G_OPTION_ARG_NONE, pswit+MARKUP_SWITCH,
    1.80        "Ignore markup in < >", NULL },
    1.81 +    { "no-markup", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    1.82 +      G_OPTION_ARG_NONE, pswit+MARKUP_SWITCH,
    1.83 +      "No special handling for markup in < >", NULL },
    1.84      { "usertypo", 'u', 0, G_OPTION_ARG_NONE, pswit+USERTYPO_SWITCH,
    1.85        "Use file of user-defined typos", NULL },
    1.86 +    { "no-usertypo", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    1.87 +      G_OPTION_ARG_NONE, pswit+USERTYPO_SWITCH,
    1.88 +      "Ignore file of user-defined typos", NULL },
    1.89 +    { "verbose", 'v', 0, G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
    1.90 +      "Verbose - list everything", NULL },
    1.91 +    { "no-verbose", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    1.92 +      G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
    1.93 +      "Switch off verbose mode", NULL },
    1.94 +    { "charset", 0, 0, G_OPTION_ARG_STRING, &opt_charset,
    1.95 +      "Set of characters valid for this ebook", "NAME" },
    1.96 +    { NULL }
    1.97 +};
    1.98 +
    1.99 +/*
   1.100 + * Options relating to configuration which make no sense from inside
   1.101 + * a configuration file.
   1.102 + */
   1.103 +
   1.104 +static GOptionEntry config_options[]={
   1.105      { "web", 'w', 0, G_OPTION_ARG_NONE, pswit+WEB_SWITCH,
   1.106        "Defaults for use on www upload", NULL },
   1.107 -    { "verbose", 'v', 0, G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
   1.108 -      "Verbose - list everything", NULL },
   1.109 +    { "dump-config", 0, 0, G_OPTION_ARG_NONE, pswit+DUMP_CONFIG_SWITCH,
   1.110 +      "Dump current config settings", NULL },
   1.111 +    { NULL }
   1.112 +};
   1.113 +
   1.114 +static GOptionEntry compatibility_options[]={
   1.115 +    { "toggle-typo", 't', 0, G_OPTION_ARG_NONE, &typo_compat,
   1.116 +      "Toggle checking for common typos", NULL },
   1.117 +    { "toggle-relaxed", 'x', 0, G_OPTION_ARG_NONE, &paranoid_compat,
   1.118 +      "Toggle both paranoid mode and common typos", NULL },
   1.119      { NULL }
   1.120  };
   1.121  
   1.122 @@ -200,31 +268,275 @@
   1.123  UINT saved_cp;
   1.124  #endif
   1.125  
   1.126 +gboolean set_charset(const char *name,GError **err)
   1.127 +{
   1.128 +    /* The various UNICODE encodings all share the same character set. */
   1.129 +    const char *unicode_aliases[]={ "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4",
   1.130 +      "UCS-4BE", "UCS-4LE", "UCS2", "UCS4", "UNICODE", "UNICODEBIG",
   1.131 +      "UNICODELITTLE", "UTF-7", "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE",
   1.132 +      "UTF-32", "UTF-32BE", "UTF-32LE", "UTF7", "UTF8", "UTF16", "UTF16BE",
   1.133 +      "UTF16LE", "UTF32", "UTF32BE", "UTF32LE" };
   1.134 +    int i;
   1.135 +    if (charset)
   1.136 +	g_free(charset);
   1.137 +    if (charset_validator!=(GIConv)-1)
   1.138 +	g_iconv_close(charset_validator);
   1.139 +    if (!name || !g_strcasecmp(name,"auto"))
   1.140 +    {
   1.141 +	charset=NULL;
   1.142 +	charset_validator=(GIConv)-1;
   1.143 +	return TRUE;
   1.144 +    }
   1.145 +    else
   1.146 +	charset=g_strdup(name);
   1.147 +    for(i=0;i<G_N_ELEMENTS(unicode_aliases);i++)
   1.148 +	if (!g_strcasecmp(charset,unicode_aliases[i]))
   1.149 +	{
   1.150 +	    g_free(charset);
   1.151 +	    charset=g_strdup("UTF-8");
   1.152 +	    break;
   1.153 +	}
   1.154 +    if (!strcmp(charset,"UTF-8"))
   1.155 +	charset_validator=(GIConv)-1;
   1.156 +    else
   1.157 +    {
   1.158 +	charset_validator=g_iconv_open(charset,"UTF-8");
   1.159 +	if (charset_validator==(GIConv)-1)
   1.160 +	{
   1.161 +	    g_set_error(err,G_CONVERT_ERROR,G_CONVERT_ERROR_NO_CONVERSION,
   1.162 +	      "Unknown character set \"%s\"",charset);
   1.163 +	    return FALSE;
   1.164 +	}
   1.165 +    }
   1.166 +    return TRUE;
   1.167 +}
   1.168 +
   1.169 +GKeyFile *config;
   1.170 +
   1.171 +void config_file_update(GKeyFile *kf)
   1.172 +{
   1.173 +    int i;
   1.174 +    const char *s;
   1.175 +    gboolean sw;
   1.176 +    for(i=0;options[i].long_name;i++)
   1.177 +    {
   1.178 +	if (g_str_has_prefix(options[i].long_name,"no-"))
   1.179 +	    continue;
   1.180 +	if (options[i].arg==G_OPTION_ARG_NONE)
   1.181 +	{
   1.182 +	    sw=*(gboolean *)options[i].arg_data;
   1.183 +	    if (options[i].flags&G_OPTION_FLAG_REVERSE)
   1.184 +		sw=!sw;
   1.185 +	    g_key_file_set_boolean(kf,"options",options[i].long_name,sw);
   1.186 +	}
   1.187 +	else if (options[i].arg==G_OPTION_ARG_STRING)
   1.188 +	{
   1.189 +	    s=*(gchar **)options[i].arg_data;
   1.190 +	    if (!s)
   1.191 +		s="auto";
   1.192 +	    g_key_file_set_string(kf,"options",options[i].long_name,s);
   1.193 +	}
   1.194 +	else
   1.195 +	    g_assert_not_reached();
   1.196 +    }
   1.197 +}
   1.198 +
   1.199 +void config_file_add_comments(GKeyFile *kf)
   1.200 +{
   1.201 +    int i;
   1.202 +    gchar *comment;
   1.203 +    g_key_file_set_comment(kf,NULL,NULL," Default configuration for bookloupe",
   1.204 +      NULL);
   1.205 +    for(i=0;options[i].long_name;i++)
   1.206 +    {
   1.207 +	if (g_str_has_prefix(options[i].long_name,"no-"))
   1.208 +	    continue;
   1.209 +	comment=g_strconcat(" ",options[i].description,NULL);
   1.210 +	g_key_file_set_comment(kf,"options",options[i].long_name,comment,NULL);
   1.211 +	g_free(comment);
   1.212 +    }
   1.213 +}
   1.214 +
   1.215 +void dump_config(void)
   1.216 +{
   1.217 +    gchar *s;
   1.218 +    if (config)
   1.219 +	config_file_update(config);
   1.220 +    else
   1.221 +    {
   1.222 +	config=g_key_file_new();
   1.223 +	config_file_update(config);
   1.224 +	config_file_add_comments(config);
   1.225 +    }
   1.226 +    s=g_key_file_to_data(config,NULL,NULL);
   1.227 +    if (s)
   1.228 +	g_print("%s",s);
   1.229 +    g_free(s);
   1.230 +}
   1.231 +
   1.232 +GKeyFile *read_config_file(gchar **full_path)
   1.233 +{
   1.234 +    int i;
   1.235 +    GError *err=NULL;
   1.236 +    gchar **search_dirs;
   1.237 +    gchar *path;
   1.238 +    const char *search_path;
   1.239 +    GKeyFile *kf;
   1.240 +    kf=g_key_file_new();
   1.241 +    search_path=g_getenv("BOOKLOUPE_CONFIG_PATH");
   1.242 +    if (search_path)
   1.243 +    {
   1.244 +#ifdef __WIN32__
   1.245 +	search_dirs=g_strsplit(search_path,";",0);
   1.246 +#else
   1.247 +	search_dirs=g_strsplit(search_path,":",0);
   1.248 +#endif
   1.249 +    }
   1.250 +    else
   1.251 +    {
   1.252 +	search_dirs=g_new(gchar *,4);
   1.253 +	search_dirs[0]=g_get_current_dir();
   1.254 +	search_dirs[1]=g_strdup(running_from);
   1.255 +	search_dirs[2]=g_strdup(g_get_user_config_dir());
   1.256 +	search_dirs[3]=NULL;
   1.257 +    }
   1.258 +    for(i=0;search_dirs[i];i++)
   1.259 +    {
   1.260 +	path=g_build_filename(search_dirs[i],"bookloupe.ini",NULL);
   1.261 +	if (g_key_file_load_from_file(kf,path,
   1.262 +	  G_KEY_FILE_KEEP_COMMENTS|G_KEY_FILE_KEEP_TRANSLATIONS,&err))
   1.263 +	    break;
   1.264 +	if (!g_error_matches(err,G_FILE_ERROR,G_FILE_ERROR_NOENT))
   1.265 +	{
   1.266 +	    g_printerr("Bookloupe: Error reading %s\n",path);
   1.267 +	    g_printerr("%s\n",err->message);
   1.268 +	    exit(1);
   1.269 +	}
   1.270 +	g_clear_error(&err);
   1.271 +	g_free(path);
   1.272 +	path=NULL;
   1.273 +    }
   1.274 +    if (!search_dirs[i])
   1.275 +    {
   1.276 +	g_key_file_free(kf);
   1.277 +	kf=NULL;
   1.278 +    }
   1.279 +    g_strfreev(search_dirs);
   1.280 +    if (full_path && kf)
   1.281 +	*full_path=path;
   1.282 +    else
   1.283 +	g_free(path);
   1.284 +    return kf;
   1.285 +}
   1.286 +
   1.287 +void parse_config_file(void)
   1.288 +{
   1.289 +    int i,j;
   1.290 +    gchar *path,*s;
   1.291 +    gchar **keys;
   1.292 +    gboolean sw;
   1.293 +    GError *err=NULL;
   1.294 +    config=read_config_file(&path);
   1.295 +    if (config)
   1.296 +	keys=g_key_file_get_keys(config,"options",NULL,NULL);
   1.297 +    else
   1.298 +	keys=NULL;
   1.299 +    if (keys)
   1.300 +    {
   1.301 +	for(i=0;keys[i];i++)
   1.302 +	{
   1.303 +	    for(j=0;options[j].long_name;j++)
   1.304 +	    {
   1.305 +		if (g_str_has_prefix(options[j].long_name,"no-"))
   1.306 +		    continue;
   1.307 +		else if (!strcmp(keys[i],options[j].long_name))
   1.308 +		{
   1.309 +		    if (options[j].arg==G_OPTION_ARG_NONE)
   1.310 +		    {
   1.311 +			sw=g_key_file_get_boolean(config,"options",keys[i],
   1.312 +			  &err);
   1.313 +			if (err)
   1.314 +			{
   1.315 +			    g_printerr("Bookloupe: %s: options.%s: %s\n",
   1.316 +			      path,keys[i],err->message);
   1.317 +			    g_clear_error(&err);
   1.318 +			}
   1.319 +			else
   1.320 +			{
   1.321 +			    if (options[j].flags&G_OPTION_FLAG_REVERSE)
   1.322 +				sw=!sw;
   1.323 +			    *(gboolean *)options[j].arg_data=sw;
   1.324 +			}
   1.325 +			break;
   1.326 +		    }
   1.327 +		    else if (options[j].arg==G_OPTION_ARG_STRING)
   1.328 +		    {
   1.329 +			s=g_key_file_get_string(config,"options",keys[i],
   1.330 +			  &err);
   1.331 +			if (err)
   1.332 +			{
   1.333 +			    g_printerr("Bookloupe: %s: options.%s: %s\n",
   1.334 +			      path,keys[i],err->message);
   1.335 +			    g_clear_error(&err);
   1.336 +			}
   1.337 +			else
   1.338 +			{
   1.339 +			    g_free(*(gchar **)options[j].arg_data);
   1.340 +			    if (!g_strcmp0(s,"auto"))
   1.341 +			    {
   1.342 +				*(gchar **)options[j].arg_data=NULL;
   1.343 +				g_free(s);
   1.344 +			    }
   1.345 +			    else
   1.346 +				*(gchar **)options[j].arg_data=s;
   1.347 +			}
   1.348 +			break;
   1.349 +		    }
   1.350 +		    else
   1.351 +			g_assert_not_reached();
   1.352 +		}
   1.353 +	    }
   1.354 +	    if (!options[j].long_name)
   1.355 +		g_printerr("Bookloupe: %s: Unknown option \"%s\" ignored\n",
   1.356 +		  path,keys[i]);
   1.357 +	}
   1.358 +	g_strfreev(keys);
   1.359 +    }
   1.360 +    if (config)
   1.361 +	g_free(path);
   1.362 +}
   1.363 +
   1.364  void parse_options(int *argc,char ***argv)
   1.365  {
   1.366      GError *err=NULL;
   1.367      GOptionContext *context;
   1.368 +    GOptionGroup *compatibility;
   1.369      context=g_option_context_new(
   1.370 -      "file - looks for errors in Project Gutenberg(TM) etexts");
   1.371 +      "file - look for errors in Project Gutenberg(TM) etexts");
   1.372      g_option_context_add_main_entries(context,options,NULL);
   1.373 +    g_option_context_add_main_entries(context,config_options,NULL);
   1.374 +    compatibility=g_option_group_new("compatibility",
   1.375 +      "Options for Compatibility with Gutcheck:",
   1.376 +      "Show compatibility options",NULL,NULL);
   1.377 +    g_option_group_add_entries(compatibility,compatibility_options);
   1.378 +    g_option_context_add_group(context,compatibility);
   1.379 +    g_option_context_set_description(context,
   1.380 +      "For simplicity, only the switch options which reverse the\n"
   1.381 +      "default configuration are listed. In most cases, both vanilla\n"
   1.382 +      "and \"no-\" prefixed versions are available for use.");
   1.383      if (!g_option_context_parse(context,argc,argv,&err))
   1.384      {
   1.385  	g_printerr("Bookloupe: %s\n",err->message);
   1.386  	g_printerr("Use \"%s --help\" for help\n",(*argv)[0]);
   1.387  	exit(1);
   1.388      }
   1.389 -    /* Paranoid checking is turned OFF, not on, by its switch */
   1.390 -    pswit[PARANOID_SWITCH]=!pswit[PARANOID_SWITCH];
   1.391 -    if (pswit[PARANOID_SWITCH])
   1.392 -	/* if running in paranoid mode, typo checks default to enabled */
   1.393 +    if (typo_compat)
   1.394  	pswit[TYPO_SWITCH]=!pswit[TYPO_SWITCH];
   1.395 -    /* Line-end checking is turned OFF, not on, by its switch */
   1.396 -    pswit[LINE_END_SWITCH]=!pswit[LINE_END_SWITCH];
   1.397 -    /* Echoing is turned OFF, not on, by its switch */
   1.398 -    pswit[ECHO_SWITCH]=!pswit[ECHO_SWITCH];
   1.399 -    if (pswit[OVERVIEW_SWITCH])
   1.400 -	/* just print summary; don't echo */
   1.401 -	pswit[ECHO_SWITCH]=FALSE;
   1.402 +    if (paranoid_compat)
   1.403 +    {
   1.404 +	pswit[PARANOID_SWITCH]=!pswit[PARANOID_SWITCH];
   1.405 +	pswit[TYPO_SWITCH]=!pswit[TYPO_SWITCH];
   1.406 +    }
   1.407      /*
   1.408       * Web uploads - for the moment, this is really just a placeholder
   1.409       * until we decide what processing we really want to do on web uploads
   1.410 @@ -246,6 +558,21 @@
   1.411  	pswit[USERTYPO_SWITCH]=FALSE;
   1.412  	pswit[DP_SWITCH]=FALSE;
   1.413      }
   1.414 +    if (opt_charset && !set_charset(opt_charset,&err))
   1.415 +    {
   1.416 +	g_printerr("%s\n",err->message);
   1.417 +	exit(1);
   1.418 +    }
   1.419 +    if (pswit[DUMP_CONFIG_SWITCH])
   1.420 +    {
   1.421 +	dump_config();
   1.422 +	exit(0);
   1.423 +    }
   1.424 +    g_free(opt_charset);
   1.425 +    opt_charset=NULL;
   1.426 +    if (pswit[OVERVIEW_SWITCH])
   1.427 +	/* just print summary; don't echo */
   1.428 +	pswit[ECHO_SWITCH]=FALSE;
   1.429      if (*argc<2)
   1.430      {
   1.431  	proghelp(context);
   1.432 @@ -305,7 +632,11 @@
   1.433  	exit(1);
   1.434      }
   1.435      if (g_utf8_validate(contents,len,NULL))
   1.436 +    {
   1.437  	utf8=g_utf8_normalize(contents,len,G_NORMALIZE_DEFAULT_COMPOSE);
   1.438 +	if (!charset)
   1.439 +	    (void)set_charset("UNICODE",NULL);
   1.440 +    }
   1.441      else
   1.442  	utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
   1.443      g_free(contents);
   1.444 @@ -388,6 +719,15 @@
   1.445      saved_cp=GetConsoleOutputCP();
   1.446  #endif
   1.447      running_from=g_path_get_dirname(argv[0]);
   1.448 +    /* Paranoid checking is turned OFF, not on, by its switch */
   1.449 +    pswit[PARANOID_SWITCH]=TRUE;
   1.450 +    /* if running in paranoid mode, typo checks default to enabled */
   1.451 +    pswit[TYPO_SWITCH]=TRUE;
   1.452 +    /* Line-end checking is turned OFF, not on, by its switch */
   1.453 +    pswit[LINE_END_SWITCH]=TRUE;
   1.454 +    /* Echoing is turned OFF, not on, by its switch */
   1.455 +    pswit[ECHO_SWITCH]=TRUE;
   1.456 +    parse_config_file();
   1.457      parse_options(&argc,&argv);
   1.458      if (pswit[USERTYPO_SWITCH])
   1.459  	read_user_scannos();
   1.460 @@ -428,6 +768,9 @@
   1.461      g_free(running_from);
   1.462      if (usertypo)
   1.463  	g_tree_unref(usertypo);
   1.464 +    set_charset(NULL,NULL);
   1.465 +    if (config)
   1.466 +	g_key_file_free(config);
   1.467      return 0;
   1.468  }
   1.469  
   1.470 @@ -708,25 +1051,32 @@
   1.471  	  "Not reporting them.\n",
   1.472  	  results->spacedash+results->non_PG_space_emdash);
   1.473      }
   1.474 -    /* If more than a quarter of characters are hi-bit, bug out. */
   1.475 -    warnings.bin=1;
   1.476 -    if (results->binlen*4>results->totlen)
   1.477 +    if (charset)
   1.478 +	warnings.bin=0;
   1.479 +    else
   1.480      {
   1.481 -	g_print("   --> This file does not appear to be ASCII. "
   1.482 -	  "Terminating. Best of luck with it!\n");
   1.483 -	exit(1);
   1.484 -    }
   1.485 -    if (results->alphalen*4<results->totlen)
   1.486 -    {
   1.487 -	g_print("   --> This file does not appear to be text. "
   1.488 -	  "Terminating. Best of luck with it!\n");
   1.489 -	exit(1);
   1.490 -    }
   1.491 -    if (results->binlen*100>results->totlen || results->binlen>100)
   1.492 -    {
   1.493 -	g_print("   --> There are a lot of foreign letters here. "
   1.494 -	  "Not reporting them.\n");
   1.495 -	warnings.bin=0;
   1.496 +	/* Charset ISO_8859-1/ASCII checks for compatibility with gutcheck */
   1.497 +	warnings.bin=1;
   1.498 +	/* If more than a quarter of characters are hi-bit, bug out. */
   1.499 +	if (results->binlen*4>results->totlen)
   1.500 +	{
   1.501 +	    g_print("   --> This file does not appear to be ASCII. "
   1.502 +	      "Terminating. Best of luck with it!\n");
   1.503 +	    exit(1);
   1.504 +	}
   1.505 +	if (results->alphalen*4<results->totlen)
   1.506 +	{
   1.507 +	    g_print("   --> This file does not appear to be text. "
   1.508 +	      "Terminating. Best of luck with it!\n");
   1.509 +	    exit(1);
   1.510 +	}
   1.511 +	if (results->binlen*100>results->totlen || results->binlen>100)
   1.512 +	{
   1.513 +	    g_print("   --> There are a lot of foreign letters here. "
   1.514 +	      "Not reporting them.\n");
   1.515 +	    if (!pswit[VERBOSE_SWITCH])
   1.516 +		warnings.bin=0;
   1.517 +	}
   1.518      }
   1.519      warnings.isDutch=FALSE;
   1.520      if (results->Dutchcount>50)
   1.521 @@ -754,7 +1104,6 @@
   1.522      g_print("\n");
   1.523      if (pswit[VERBOSE_SWITCH])
   1.524      {
   1.525 -	warnings.bin=1;
   1.526  	warnings.shortline=1;
   1.527  	warnings.dotcomma=1;
   1.528  	warnings.longline=1;
   1.529 @@ -949,14 +1298,17 @@
   1.530    gboolean isemptyline)
   1.531  {
   1.532      /* Don't repeat multiple warnings on one line. */
   1.533 -    gboolean eNon_A=FALSE,eTab=FALSE,eTilde=FALSE;
   1.534 +    gboolean eInvalidChar=FALSE,eTab=FALSE,eTilde=FALSE;
   1.535      gboolean eCarat=FALSE,eFSlash=FALSE,eAst=FALSE;
   1.536      const char *s;
   1.537      gunichar c;
   1.538 +    gsize nb;
   1.539 +    gchar *t;
   1.540      for (s=aline;*s;s=g_utf8_next_char(s))
   1.541      {
   1.542  	c=g_utf8_get_char(s);
   1.543 -	if (!eNon_A && (c<CHAR_SPACE && c!='\t' && c!='\n' || c>127))
   1.544 +	if (warnings->bin && !eInvalidChar &&
   1.545 +	  (c<CHAR_SPACE && c!='\t' && c!='\n' || c>127))
   1.546  	{
   1.547  	    if (pswit[ECHO_SWITCH])
   1.548  		g_print("\n%s\n",aline);
   1.549 @@ -971,7 +1323,57 @@
   1.550  		      linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
   1.551  	    else
   1.552  		cnt_bin++;
   1.553 -	    eNon_A=TRUE;
   1.554 +	    eInvalidChar=TRUE;
   1.555 +	}
   1.556 +	if (!eInvalidChar && charset)
   1.557 +	{
   1.558 +	    if (charset_validator==(GIConv)-1)
   1.559 +	    {
   1.560 +		if (!g_unichar_isdefined(c))
   1.561 +		{
   1.562 +		    if (pswit[ECHO_SWITCH])
   1.563 +			g_print("\n%s\n",aline);
   1.564 +		    if (!pswit[OVERVIEW_SWITCH])
   1.565 +			g_print("    Line %ld column %ld - Unassigned UNICODE "
   1.566 +			  "code point U+%04" G_GINT32_MODIFIER "X\n",
   1.567 +			  linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
   1.568 +		    else
   1.569 +			cnt_bin++;
   1.570 +		    eInvalidChar=TRUE;
   1.571 +		}
   1.572 +		else if (c>=0xE000 && c<=0xF8FF || c>=0xF0000 && c<=0xFFFFD ||
   1.573 +		  c>=100000 && c<=0x10FFFD)
   1.574 +		{
   1.575 +		    if (pswit[ECHO_SWITCH])
   1.576 +			g_print("\n%s\n",aline);
   1.577 +		    if (!pswit[OVERVIEW_SWITCH])
   1.578 +			g_print("    Line %ld column %ld - Private Use "
   1.579 +			  "character U+%04" G_GINT32_MODIFIER "X\n",
   1.580 +			  linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
   1.581 +		    else
   1.582 +			cnt_bin++;
   1.583 +		    eInvalidChar=TRUE;
   1.584 +		}
   1.585 +	    }
   1.586 +	    else
   1.587 +	    {
   1.588 +		t=g_convert_with_iconv(s,g_utf8_next_char(s)-s,
   1.589 +		  charset_validator,NULL,&nb,NULL);
   1.590 +		if (t)
   1.591 +		    g_free(t);
   1.592 +		else
   1.593 +		{
   1.594 +		    if (pswit[ECHO_SWITCH])
   1.595 +			g_print("\n%s\n",aline);
   1.596 +		    if (!pswit[OVERVIEW_SWITCH])
   1.597 +			g_print("    Line %ld column %ld - Non-%s "
   1.598 +			  "character %u\n",linecnt,
   1.599 +			  g_utf8_pointer_to_offset(aline,s)+1,charset,c);
   1.600 +		    else
   1.601 +			cnt_bin++;
   1.602 +		    eInvalidChar=TRUE;
   1.603 +		}
   1.604 +	    }
   1.605  	}
   1.606  	if (!eTab && c==CHAR_TAB)
   1.607  	{
   1.608 @@ -2626,8 +3028,7 @@
   1.609  	if (s>=aline && g_utf8_get_char(s)=='-')
   1.610  	    enddash=TRUE;
   1.611  	check_for_control_characters(aline);
   1.612 -	if (warnings->bin)
   1.613 -	    check_for_odd_characters(aline,warnings,isemptyline);
   1.614 +	check_for_odd_characters(aline,warnings,isemptyline);
   1.615  	if (warnings->longline)
   1.616  	    check_for_long_line(aline);
   1.617  	if (warnings->shortline)