Merge bugs #13+14: charsets / configuration file
authorali <ali@juiblex.co.uk>
Thu Oct 03 23:00:49 2013 +0100 (2013-10-03)
changeset 1548b17763b9bfb
parent 153 76cc493e8f37
parent 152 da598b05f8e8
child 155 1f1d40127177
Merge bugs #13+14: charsets / configuration file
test/harness/loupe-test.c
test/harness/testcase.c
test/harness/testcase.h
     1.1 --- a/Makefile.am	Thu Oct 03 22:59:44 2013 +0100
     1.2 +++ b/Makefile.am	Thu Oct 03 23:00:49 2013 +0100
     1.3 @@ -1,1 +1,3 @@
     1.4  SUBDIRS=bl bookloupe test doc
     1.5 +
     1.6 +dist_pkgdata_DATA=sample.ini
     2.1 --- a/bookloupe/bookloupe.c	Thu Oct 03 22:59:44 2013 +0100
     2.2 +++ b/bookloupe/bookloupe.c	Thu Oct 03 23:00:49 2013 +0100
     2.3 @@ -32,6 +32,9 @@
     2.4  #include "pending.h"
     2.5  #include "HTMLentities.h"
     2.6  
     2.7 +gchar *charset;		/* Or NULL for auto (ISO_8859-1/ASCII or UNICODE) */
     2.8 +GIConv charset_validator=(GIConv)-1;
     2.9 +
    2.10  gchar *prevline;
    2.11  
    2.12  /* Common typos. */
    2.13 @@ -127,36 +130,101 @@
    2.14  }; 
    2.15  
    2.16  gboolean pswit[SWITNO];  /* program switches */
    2.17 +gchar *opt_charset;
    2.18 +
    2.19 +gboolean typo_compat,paranoid_compat;
    2.20  
    2.21  static GOptionEntry options[]={
    2.22      { "dp", 'd', 0, G_OPTION_ARG_NONE, pswit+DP_SWITCH,
    2.23        "Ignore DP-specific markup", NULL },
    2.24 -    { "noecho", 'e', 0, G_OPTION_ARG_NONE, pswit+ECHO_SWITCH,
    2.25 +    { "no-dp", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    2.26 +      G_OPTION_ARG_NONE, pswit+DP_SWITCH,
    2.27 +      "Don't ignore DP-specific markup", NULL },
    2.28 +    { "echo", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, pswit+ECHO_SWITCH,
    2.29 +      "Echo queried line", NULL },
    2.30 +    { "no-echo", 'e', G_OPTION_FLAG_REVERSE,
    2.31 +      G_OPTION_ARG_NONE, pswit+ECHO_SWITCH,
    2.32        "Don't echo queried line", NULL },
    2.33      { "squote", 's', 0, G_OPTION_ARG_NONE, pswit+SQUOTE_SWITCH,
    2.34        "Check single quotes", NULL },
    2.35 -    { "typo", 't', 0, G_OPTION_ARG_NONE, pswit+TYPO_SWITCH,
    2.36 +    { "no-squote", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    2.37 +      G_OPTION_ARG_NONE, pswit+SQUOTE_SWITCH,
    2.38 +      "Don't check single quotes", NULL },
    2.39 +    { "typo", 0, 0, G_OPTION_ARG_NONE, pswit+TYPO_SWITCH,
    2.40        "Check common typos", NULL },
    2.41 +    { "no-typo", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    2.42 +      G_OPTION_ARG_NONE, pswit+TYPO_SWITCH,
    2.43 +      "Don't check common typos", NULL },
    2.44      { "qpara", 'p', 0, G_OPTION_ARG_NONE, pswit+QPARA_SWITCH,
    2.45        "Require closure of quotes on every paragraph", NULL },
    2.46 -    { "relaxed", 'x', 0, G_OPTION_ARG_NONE, pswit+PARANOID_SWITCH,
    2.47 +    { "no-qpara", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    2.48 +      G_OPTION_ARG_NONE, pswit+QPARA_SWITCH,
    2.49 +      "Don't require closure of quotes on every paragraph", NULL },
    2.50 +    { "paranoid", 0, G_OPTION_FLAG_HIDDEN,
    2.51 +      G_OPTION_ARG_NONE, pswit+PARANOID_SWITCH,
    2.52 +      "Enable paranoid querying of everything", NULL },
    2.53 +    { "no-paranoid", 0, G_OPTION_FLAG_REVERSE,
    2.54 +      G_OPTION_ARG_NONE, pswit+PARANOID_SWITCH,
    2.55        "Disable paranoid querying of everything", NULL },
    2.56 -    { "line-end", 'l', 0, G_OPTION_ARG_NONE, pswit+LINE_END_SWITCH,
    2.57 -      "Disable line end checking", NULL },
    2.58 +    { "line-end", 0, G_OPTION_FLAG_HIDDEN,
    2.59 +      G_OPTION_ARG_NONE, pswit+LINE_END_SWITCH,
    2.60 +      "Enable line end checking", NULL },
    2.61 +    { "no-line-end", 'l', G_OPTION_FLAG_REVERSE,
    2.62 +      G_OPTION_ARG_NONE, pswit+LINE_END_SWITCH,
    2.63 +      "Diable line end checking", NULL },
    2.64      { "overview", 'o', 0, G_OPTION_ARG_NONE, pswit+OVERVIEW_SWITCH,
    2.65        "Overview: just show counts", NULL },
    2.66 +    { "no-overview", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    2.67 +      G_OPTION_ARG_NONE, pswit+OVERVIEW_SWITCH,
    2.68 +      "Show individual warnings", NULL },
    2.69      { "stdout", 'y', 0, G_OPTION_ARG_NONE, pswit+STDOUT_SWITCH,
    2.70        "Output errors to stdout instead of stderr", NULL },
    2.71 +    { "no-stdout", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    2.72 +      G_OPTION_ARG_NONE, pswit+STDOUT_SWITCH,
    2.73 +      "Output errors to stderr instead of stdout", NULL },
    2.74      { "header", 'h', 0, G_OPTION_ARG_NONE, pswit+HEADER_SWITCH,
    2.75        "Echo header fields", NULL },
    2.76 +    { "no-header", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    2.77 +      G_OPTION_ARG_NONE, pswit+HEADER_SWITCH,
    2.78 +      "Don't echo header fields", NULL },
    2.79      { "markup", 'm', 0, G_OPTION_ARG_NONE, pswit+MARKUP_SWITCH,
    2.80        "Ignore markup in < >", NULL },
    2.81 +    { "no-markup", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    2.82 +      G_OPTION_ARG_NONE, pswit+MARKUP_SWITCH,
    2.83 +      "No special handling for markup in < >", NULL },
    2.84      { "usertypo", 'u', 0, G_OPTION_ARG_NONE, pswit+USERTYPO_SWITCH,
    2.85        "Use file of user-defined typos", NULL },
    2.86 +    { "no-usertypo", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    2.87 +      G_OPTION_ARG_NONE, pswit+USERTYPO_SWITCH,
    2.88 +      "Ignore file of user-defined typos", NULL },
    2.89 +    { "verbose", 'v', 0, G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
    2.90 +      "Verbose - list everything", NULL },
    2.91 +    { "no-verbose", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    2.92 +      G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
    2.93 +      "Switch off verbose mode", NULL },
    2.94 +    { "charset", 0, 0, G_OPTION_ARG_STRING, &opt_charset,
    2.95 +      "Set of characters valid for this ebook", "NAME" },
    2.96 +    { NULL }
    2.97 +};
    2.98 +
    2.99 +/*
   2.100 + * Options relating to configuration which make no sense from inside
   2.101 + * a configuration file.
   2.102 + */
   2.103 +
   2.104 +static GOptionEntry config_options[]={
   2.105      { "web", 'w', 0, G_OPTION_ARG_NONE, pswit+WEB_SWITCH,
   2.106        "Defaults for use on www upload", NULL },
   2.107 -    { "verbose", 'v', 0, G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
   2.108 -      "Verbose - list everything", NULL },
   2.109 +    { "dump-config", 0, 0, G_OPTION_ARG_NONE, pswit+DUMP_CONFIG_SWITCH,
   2.110 +      "Dump current config settings", NULL },
   2.111 +    { NULL }
   2.112 +};
   2.113 +
   2.114 +static GOptionEntry compatibility_options[]={
   2.115 +    { "toggle-typo", 't', 0, G_OPTION_ARG_NONE, &typo_compat,
   2.116 +      "Toggle checking for common typos", NULL },
   2.117 +    { "toggle-relaxed", 'x', 0, G_OPTION_ARG_NONE, &paranoid_compat,
   2.118 +      "Toggle both paranoid mode and common typos", NULL },
   2.119      { NULL }
   2.120  };
   2.121  
   2.122 @@ -200,31 +268,275 @@
   2.123  UINT saved_cp;
   2.124  #endif
   2.125  
   2.126 +gboolean set_charset(const char *name,GError **err)
   2.127 +{
   2.128 +    /* The various UNICODE encodings all share the same character set. */
   2.129 +    const char *unicode_aliases[]={ "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4",
   2.130 +      "UCS-4BE", "UCS-4LE", "UCS2", "UCS4", "UNICODE", "UNICODEBIG",
   2.131 +      "UNICODELITTLE", "UTF-7", "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE",
   2.132 +      "UTF-32", "UTF-32BE", "UTF-32LE", "UTF7", "UTF8", "UTF16", "UTF16BE",
   2.133 +      "UTF16LE", "UTF32", "UTF32BE", "UTF32LE" };
   2.134 +    int i;
   2.135 +    if (charset)
   2.136 +	g_free(charset);
   2.137 +    if (charset_validator!=(GIConv)-1)
   2.138 +	g_iconv_close(charset_validator);
   2.139 +    if (!name || !g_strcasecmp(name,"auto"))
   2.140 +    {
   2.141 +	charset=NULL;
   2.142 +	charset_validator=(GIConv)-1;
   2.143 +	return TRUE;
   2.144 +    }
   2.145 +    else
   2.146 +	charset=g_strdup(name);
   2.147 +    for(i=0;i<G_N_ELEMENTS(unicode_aliases);i++)
   2.148 +	if (!g_strcasecmp(charset,unicode_aliases[i]))
   2.149 +	{
   2.150 +	    g_free(charset);
   2.151 +	    charset=g_strdup("UTF-8");
   2.152 +	    break;
   2.153 +	}
   2.154 +    if (!strcmp(charset,"UTF-8"))
   2.155 +	charset_validator=(GIConv)-1;
   2.156 +    else
   2.157 +    {
   2.158 +	charset_validator=g_iconv_open(charset,"UTF-8");
   2.159 +	if (charset_validator==(GIConv)-1)
   2.160 +	{
   2.161 +	    g_set_error(err,G_CONVERT_ERROR,G_CONVERT_ERROR_NO_CONVERSION,
   2.162 +	      "Unknown character set \"%s\"",charset);
   2.163 +	    return FALSE;
   2.164 +	}
   2.165 +    }
   2.166 +    return TRUE;
   2.167 +}
   2.168 +
   2.169 +GKeyFile *config;
   2.170 +
   2.171 +void config_file_update(GKeyFile *kf)
   2.172 +{
   2.173 +    int i;
   2.174 +    const char *s;
   2.175 +    gboolean sw;
   2.176 +    for(i=0;options[i].long_name;i++)
   2.177 +    {
   2.178 +	if (g_str_has_prefix(options[i].long_name,"no-"))
   2.179 +	    continue;
   2.180 +	if (options[i].arg==G_OPTION_ARG_NONE)
   2.181 +	{
   2.182 +	    sw=*(gboolean *)options[i].arg_data;
   2.183 +	    if (options[i].flags&G_OPTION_FLAG_REVERSE)
   2.184 +		sw=!sw;
   2.185 +	    g_key_file_set_boolean(kf,"options",options[i].long_name,sw);
   2.186 +	}
   2.187 +	else if (options[i].arg==G_OPTION_ARG_STRING)
   2.188 +	{
   2.189 +	    s=*(gchar **)options[i].arg_data;
   2.190 +	    if (!s)
   2.191 +		s="auto";
   2.192 +	    g_key_file_set_string(kf,"options",options[i].long_name,s);
   2.193 +	}
   2.194 +	else
   2.195 +	    g_assert_not_reached();
   2.196 +    }
   2.197 +}
   2.198 +
   2.199 +void config_file_add_comments(GKeyFile *kf)
   2.200 +{
   2.201 +    int i;
   2.202 +    gchar *comment;
   2.203 +    g_key_file_set_comment(kf,NULL,NULL," Default configuration for bookloupe",
   2.204 +      NULL);
   2.205 +    for(i=0;options[i].long_name;i++)
   2.206 +    {
   2.207 +	if (g_str_has_prefix(options[i].long_name,"no-"))
   2.208 +	    continue;
   2.209 +	comment=g_strconcat(" ",options[i].description,NULL);
   2.210 +	g_key_file_set_comment(kf,"options",options[i].long_name,comment,NULL);
   2.211 +	g_free(comment);
   2.212 +    }
   2.213 +}
   2.214 +
   2.215 +void dump_config(void)
   2.216 +{
   2.217 +    gchar *s;
   2.218 +    if (config)
   2.219 +	config_file_update(config);
   2.220 +    else
   2.221 +    {
   2.222 +	config=g_key_file_new();
   2.223 +	config_file_update(config);
   2.224 +	config_file_add_comments(config);
   2.225 +    }
   2.226 +    s=g_key_file_to_data(config,NULL,NULL);
   2.227 +    if (s)
   2.228 +	g_print("%s",s);
   2.229 +    g_free(s);
   2.230 +}
   2.231 +
   2.232 +GKeyFile *read_config_file(gchar **full_path)
   2.233 +{
   2.234 +    int i;
   2.235 +    GError *err=NULL;
   2.236 +    gchar **search_dirs;
   2.237 +    gchar *path;
   2.238 +    const char *search_path;
   2.239 +    GKeyFile *kf;
   2.240 +    kf=g_key_file_new();
   2.241 +    search_path=g_getenv("BOOKLOUPE_CONFIG_PATH");
   2.242 +    if (search_path)
   2.243 +    {
   2.244 +#ifdef __WIN32__
   2.245 +	search_dirs=g_strsplit(search_path,";",0);
   2.246 +#else
   2.247 +	search_dirs=g_strsplit(search_path,":",0);
   2.248 +#endif
   2.249 +    }
   2.250 +    else
   2.251 +    {
   2.252 +	search_dirs=g_new(gchar *,4);
   2.253 +	search_dirs[0]=g_get_current_dir();
   2.254 +	search_dirs[1]=g_strdup(running_from);
   2.255 +	search_dirs[2]=g_strdup(g_get_user_config_dir());
   2.256 +	search_dirs[3]=NULL;
   2.257 +    }
   2.258 +    for(i=0;search_dirs[i];i++)
   2.259 +    {
   2.260 +	path=g_build_filename(search_dirs[i],"bookloupe.ini",NULL);
   2.261 +	if (g_key_file_load_from_file(kf,path,
   2.262 +	  G_KEY_FILE_KEEP_COMMENTS|G_KEY_FILE_KEEP_TRANSLATIONS,&err))
   2.263 +	    break;
   2.264 +	if (!g_error_matches(err,G_FILE_ERROR,G_FILE_ERROR_NOENT))
   2.265 +	{
   2.266 +	    g_printerr("Bookloupe: Error reading %s\n",path);
   2.267 +	    g_printerr("%s\n",err->message);
   2.268 +	    exit(1);
   2.269 +	}
   2.270 +	g_clear_error(&err);
   2.271 +	g_free(path);
   2.272 +	path=NULL;
   2.273 +    }
   2.274 +    if (!search_dirs[i])
   2.275 +    {
   2.276 +	g_key_file_free(kf);
   2.277 +	kf=NULL;
   2.278 +    }
   2.279 +    g_strfreev(search_dirs);
   2.280 +    if (full_path && kf)
   2.281 +	*full_path=path;
   2.282 +    else
   2.283 +	g_free(path);
   2.284 +    return kf;
   2.285 +}
   2.286 +
   2.287 +void parse_config_file(void)
   2.288 +{
   2.289 +    int i,j;
   2.290 +    gchar *path,*s;
   2.291 +    gchar **keys;
   2.292 +    gboolean sw;
   2.293 +    GError *err=NULL;
   2.294 +    config=read_config_file(&path);
   2.295 +    if (config)
   2.296 +	keys=g_key_file_get_keys(config,"options",NULL,NULL);
   2.297 +    else
   2.298 +	keys=NULL;
   2.299 +    if (keys)
   2.300 +    {
   2.301 +	for(i=0;keys[i];i++)
   2.302 +	{
   2.303 +	    for(j=0;options[j].long_name;j++)
   2.304 +	    {
   2.305 +		if (g_str_has_prefix(options[j].long_name,"no-"))
   2.306 +		    continue;
   2.307 +		else if (!strcmp(keys[i],options[j].long_name))
   2.308 +		{
   2.309 +		    if (options[j].arg==G_OPTION_ARG_NONE)
   2.310 +		    {
   2.311 +			sw=g_key_file_get_boolean(config,"options",keys[i],
   2.312 +			  &err);
   2.313 +			if (err)
   2.314 +			{
   2.315 +			    g_printerr("Bookloupe: %s: options.%s: %s\n",
   2.316 +			      path,keys[i],err->message);
   2.317 +			    g_clear_error(&err);
   2.318 +			}
   2.319 +			else
   2.320 +			{
   2.321 +			    if (options[j].flags&G_OPTION_FLAG_REVERSE)
   2.322 +				sw=!sw;
   2.323 +			    *(gboolean *)options[j].arg_data=sw;
   2.324 +			}
   2.325 +			break;
   2.326 +		    }
   2.327 +		    else if (options[j].arg==G_OPTION_ARG_STRING)
   2.328 +		    {
   2.329 +			s=g_key_file_get_string(config,"options",keys[i],
   2.330 +			  &err);
   2.331 +			if (err)
   2.332 +			{
   2.333 +			    g_printerr("Bookloupe: %s: options.%s: %s\n",
   2.334 +			      path,keys[i],err->message);
   2.335 +			    g_clear_error(&err);
   2.336 +			}
   2.337 +			else
   2.338 +			{
   2.339 +			    g_free(*(gchar **)options[j].arg_data);
   2.340 +			    if (!g_strcmp0(s,"auto"))
   2.341 +			    {
   2.342 +				*(gchar **)options[j].arg_data=NULL;
   2.343 +				g_free(s);
   2.344 +			    }
   2.345 +			    else
   2.346 +				*(gchar **)options[j].arg_data=s;
   2.347 +			}
   2.348 +			break;
   2.349 +		    }
   2.350 +		    else
   2.351 +			g_assert_not_reached();
   2.352 +		}
   2.353 +	    }
   2.354 +	    if (!options[j].long_name)
   2.355 +		g_printerr("Bookloupe: %s: Unknown option \"%s\" ignored\n",
   2.356 +		  path,keys[i]);
   2.357 +	}
   2.358 +	g_strfreev(keys);
   2.359 +    }
   2.360 +    if (config)
   2.361 +	g_free(path);
   2.362 +}
   2.363 +
   2.364  void parse_options(int *argc,char ***argv)
   2.365  {
   2.366      GError *err=NULL;
   2.367      GOptionContext *context;
   2.368 +    GOptionGroup *compatibility;
   2.369      context=g_option_context_new(
   2.370 -      "file - looks for errors in Project Gutenberg(TM) etexts");
   2.371 +      "file - look for errors in Project Gutenberg(TM) etexts");
   2.372      g_option_context_add_main_entries(context,options,NULL);
   2.373 +    g_option_context_add_main_entries(context,config_options,NULL);
   2.374 +    compatibility=g_option_group_new("compatibility",
   2.375 +      "Options for Compatibility with Gutcheck:",
   2.376 +      "Show compatibility options",NULL,NULL);
   2.377 +    g_option_group_add_entries(compatibility,compatibility_options);
   2.378 +    g_option_context_add_group(context,compatibility);
   2.379 +    g_option_context_set_description(context,
   2.380 +      "For simplicity, only the switch options which reverse the\n"
   2.381 +      "default configuration are listed. In most cases, both vanilla\n"
   2.382 +      "and \"no-\" prefixed versions are available for use.");
   2.383      if (!g_option_context_parse(context,argc,argv,&err))
   2.384      {
   2.385  	g_printerr("Bookloupe: %s\n",err->message);
   2.386  	g_printerr("Use \"%s --help\" for help\n",(*argv)[0]);
   2.387  	exit(1);
   2.388      }
   2.389 -    /* Paranoid checking is turned OFF, not on, by its switch */
   2.390 -    pswit[PARANOID_SWITCH]=!pswit[PARANOID_SWITCH];
   2.391 -    if (pswit[PARANOID_SWITCH])
   2.392 -	/* if running in paranoid mode, typo checks default to enabled */
   2.393 +    if (typo_compat)
   2.394  	pswit[TYPO_SWITCH]=!pswit[TYPO_SWITCH];
   2.395 -    /* Line-end checking is turned OFF, not on, by its switch */
   2.396 -    pswit[LINE_END_SWITCH]=!pswit[LINE_END_SWITCH];
   2.397 -    /* Echoing is turned OFF, not on, by its switch */
   2.398 -    pswit[ECHO_SWITCH]=!pswit[ECHO_SWITCH];
   2.399 -    if (pswit[OVERVIEW_SWITCH])
   2.400 -	/* just print summary; don't echo */
   2.401 -	pswit[ECHO_SWITCH]=FALSE;
   2.402 +    if (paranoid_compat)
   2.403 +    {
   2.404 +	pswit[PARANOID_SWITCH]=!pswit[PARANOID_SWITCH];
   2.405 +	pswit[TYPO_SWITCH]=!pswit[TYPO_SWITCH];
   2.406 +    }
   2.407      /*
   2.408       * Web uploads - for the moment, this is really just a placeholder
   2.409       * until we decide what processing we really want to do on web uploads
   2.410 @@ -246,6 +558,21 @@
   2.411  	pswit[USERTYPO_SWITCH]=FALSE;
   2.412  	pswit[DP_SWITCH]=FALSE;
   2.413      }
   2.414 +    if (opt_charset && !set_charset(opt_charset,&err))
   2.415 +    {
   2.416 +	g_printerr("%s\n",err->message);
   2.417 +	exit(1);
   2.418 +    }
   2.419 +    if (pswit[DUMP_CONFIG_SWITCH])
   2.420 +    {
   2.421 +	dump_config();
   2.422 +	exit(0);
   2.423 +    }
   2.424 +    g_free(opt_charset);
   2.425 +    opt_charset=NULL;
   2.426 +    if (pswit[OVERVIEW_SWITCH])
   2.427 +	/* just print summary; don't echo */
   2.428 +	pswit[ECHO_SWITCH]=FALSE;
   2.429      if (*argc<2)
   2.430      {
   2.431  	proghelp(context);
   2.432 @@ -305,7 +632,11 @@
   2.433  	exit(1);
   2.434      }
   2.435      if (g_utf8_validate(contents,len,NULL))
   2.436 +    {
   2.437  	utf8=g_utf8_normalize(contents,len,G_NORMALIZE_DEFAULT_COMPOSE);
   2.438 +	if (!charset)
   2.439 +	    (void)set_charset("UNICODE",NULL);
   2.440 +    }
   2.441      else
   2.442  	utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
   2.443      g_free(contents);
   2.444 @@ -388,6 +719,15 @@
   2.445      saved_cp=GetConsoleOutputCP();
   2.446  #endif
   2.447      running_from=g_path_get_dirname(argv[0]);
   2.448 +    /* Paranoid checking is turned OFF, not on, by its switch */
   2.449 +    pswit[PARANOID_SWITCH]=TRUE;
   2.450 +    /* if running in paranoid mode, typo checks default to enabled */
   2.451 +    pswit[TYPO_SWITCH]=TRUE;
   2.452 +    /* Line-end checking is turned OFF, not on, by its switch */
   2.453 +    pswit[LINE_END_SWITCH]=TRUE;
   2.454 +    /* Echoing is turned OFF, not on, by its switch */
   2.455 +    pswit[ECHO_SWITCH]=TRUE;
   2.456 +    parse_config_file();
   2.457      parse_options(&argc,&argv);
   2.458      if (pswit[USERTYPO_SWITCH])
   2.459  	read_user_scannos();
   2.460 @@ -428,6 +768,9 @@
   2.461      g_free(running_from);
   2.462      if (usertypo)
   2.463  	g_tree_unref(usertypo);
   2.464 +    set_charset(NULL,NULL);
   2.465 +    if (config)
   2.466 +	g_key_file_free(config);
   2.467      return 0;
   2.468  }
   2.469  
   2.470 @@ -708,25 +1051,32 @@
   2.471  	  "Not reporting them.\n",
   2.472  	  results->spacedash+results->non_PG_space_emdash);
   2.473      }
   2.474 -    /* If more than a quarter of characters are hi-bit, bug out. */
   2.475 -    warnings.bin=1;
   2.476 -    if (results->binlen*4>results->totlen)
   2.477 +    if (charset)
   2.478 +	warnings.bin=0;
   2.479 +    else
   2.480      {
   2.481 -	g_print("   --> This file does not appear to be ASCII. "
   2.482 -	  "Terminating. Best of luck with it!\n");
   2.483 -	exit(1);
   2.484 -    }
   2.485 -    if (results->alphalen*4<results->totlen)
   2.486 -    {
   2.487 -	g_print("   --> This file does not appear to be text. "
   2.488 -	  "Terminating. Best of luck with it!\n");
   2.489 -	exit(1);
   2.490 -    }
   2.491 -    if (results->binlen*100>results->totlen || results->binlen>100)
   2.492 -    {
   2.493 -	g_print("   --> There are a lot of foreign letters here. "
   2.494 -	  "Not reporting them.\n");
   2.495 -	warnings.bin=0;
   2.496 +	/* Charset ISO_8859-1/ASCII checks for compatibility with gutcheck */
   2.497 +	warnings.bin=1;
   2.498 +	/* If more than a quarter of characters are hi-bit, bug out. */
   2.499 +	if (results->binlen*4>results->totlen)
   2.500 +	{
   2.501 +	    g_print("   --> This file does not appear to be ASCII. "
   2.502 +	      "Terminating. Best of luck with it!\n");
   2.503 +	    exit(1);
   2.504 +	}
   2.505 +	if (results->alphalen*4<results->totlen)
   2.506 +	{
   2.507 +	    g_print("   --> This file does not appear to be text. "
   2.508 +	      "Terminating. Best of luck with it!\n");
   2.509 +	    exit(1);
   2.510 +	}
   2.511 +	if (results->binlen*100>results->totlen || results->binlen>100)
   2.512 +	{
   2.513 +	    g_print("   --> There are a lot of foreign letters here. "
   2.514 +	      "Not reporting them.\n");
   2.515 +	    if (!pswit[VERBOSE_SWITCH])
   2.516 +		warnings.bin=0;
   2.517 +	}
   2.518      }
   2.519      warnings.isDutch=FALSE;
   2.520      if (results->Dutchcount>50)
   2.521 @@ -754,7 +1104,6 @@
   2.522      g_print("\n");
   2.523      if (pswit[VERBOSE_SWITCH])
   2.524      {
   2.525 -	warnings.bin=1;
   2.526  	warnings.shortline=1;
   2.527  	warnings.dotcomma=1;
   2.528  	warnings.longline=1;
   2.529 @@ -949,14 +1298,17 @@
   2.530    gboolean isemptyline)
   2.531  {
   2.532      /* Don't repeat multiple warnings on one line. */
   2.533 -    gboolean eNon_A=FALSE,eTab=FALSE,eTilde=FALSE;
   2.534 +    gboolean eInvalidChar=FALSE,eTab=FALSE,eTilde=FALSE;
   2.535      gboolean eCarat=FALSE,eFSlash=FALSE,eAst=FALSE;
   2.536      const char *s;
   2.537      gunichar c;
   2.538 +    gsize nb;
   2.539 +    gchar *t;
   2.540      for (s=aline;*s;s=g_utf8_next_char(s))
   2.541      {
   2.542  	c=g_utf8_get_char(s);
   2.543 -	if (!eNon_A && (c<CHAR_SPACE && c!='\t' && c!='\n' || c>127))
   2.544 +	if (warnings->bin && !eInvalidChar &&
   2.545 +	  (c<CHAR_SPACE && c!='\t' && c!='\n' || c>127))
   2.546  	{
   2.547  	    if (pswit[ECHO_SWITCH])
   2.548  		g_print("\n%s\n",aline);
   2.549 @@ -971,7 +1323,57 @@
   2.550  		      linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
   2.551  	    else
   2.552  		cnt_bin++;
   2.553 -	    eNon_A=TRUE;
   2.554 +	    eInvalidChar=TRUE;
   2.555 +	}
   2.556 +	if (!eInvalidChar && charset)
   2.557 +	{
   2.558 +	    if (charset_validator==(GIConv)-1)
   2.559 +	    {
   2.560 +		if (!g_unichar_isdefined(c))
   2.561 +		{
   2.562 +		    if (pswit[ECHO_SWITCH])
   2.563 +			g_print("\n%s\n",aline);
   2.564 +		    if (!pswit[OVERVIEW_SWITCH])
   2.565 +			g_print("    Line %ld column %ld - Unassigned UNICODE "
   2.566 +			  "code point U+%04" G_GINT32_MODIFIER "X\n",
   2.567 +			  linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
   2.568 +		    else
   2.569 +			cnt_bin++;
   2.570 +		    eInvalidChar=TRUE;
   2.571 +		}
   2.572 +		else if (c>=0xE000 && c<=0xF8FF || c>=0xF0000 && c<=0xFFFFD ||
   2.573 +		  c>=100000 && c<=0x10FFFD)
   2.574 +		{
   2.575 +		    if (pswit[ECHO_SWITCH])
   2.576 +			g_print("\n%s\n",aline);
   2.577 +		    if (!pswit[OVERVIEW_SWITCH])
   2.578 +			g_print("    Line %ld column %ld - Private Use "
   2.579 +			  "character U+%04" G_GINT32_MODIFIER "X\n",
   2.580 +			  linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
   2.581 +		    else
   2.582 +			cnt_bin++;
   2.583 +		    eInvalidChar=TRUE;
   2.584 +		}
   2.585 +	    }
   2.586 +	    else
   2.587 +	    {
   2.588 +		t=g_convert_with_iconv(s,g_utf8_next_char(s)-s,
   2.589 +		  charset_validator,NULL,&nb,NULL);
   2.590 +		if (t)
   2.591 +		    g_free(t);
   2.592 +		else
   2.593 +		{
   2.594 +		    if (pswit[ECHO_SWITCH])
   2.595 +			g_print("\n%s\n",aline);
   2.596 +		    if (!pswit[OVERVIEW_SWITCH])
   2.597 +			g_print("    Line %ld column %ld - Non-%s "
   2.598 +			  "character %u\n",linecnt,
   2.599 +			  g_utf8_pointer_to_offset(aline,s)+1,charset,c);
   2.600 +		    else
   2.601 +			cnt_bin++;
   2.602 +		    eInvalidChar=TRUE;
   2.603 +		}
   2.604 +	    }
   2.605  	}
   2.606  	if (!eTab && c==CHAR_TAB)
   2.607  	{
   2.608 @@ -2626,8 +3028,7 @@
   2.609  	if (s>=aline && g_utf8_get_char(s)=='-')
   2.610  	    enddash=TRUE;
   2.611  	check_for_control_characters(aline);
   2.612 -	if (warnings->bin)
   2.613 -	    check_for_odd_characters(aline,warnings,isemptyline);
   2.614 +	check_for_odd_characters(aline,warnings,isemptyline);
   2.615  	if (warnings->longline)
   2.616  	    check_for_long_line(aline);
   2.617  	if (warnings->shortline)
     3.1 --- a/bookloupe/bookloupe.h	Thu Oct 03 22:59:44 2013 +0100
     3.2 +++ b/bookloupe/bookloupe.h	Thu Oct 03 23:00:49 2013 +0100
     3.3 @@ -55,6 +55,7 @@
     3.4      MARKUP_SWITCH,
     3.5      USERTYPO_SWITCH,
     3.6      DP_SWITCH,
     3.7 +    DUMP_CONFIG_SWITCH,
     3.8      SWITNO
     3.9  };
    3.10  
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/sample.ini	Thu Oct 03 23:00:49 2013 +0100
     4.3 @@ -0,0 +1,33 @@
     4.4 +# Default configuration for bookloupe
     4.5 +
     4.6 +[options]
     4.7 +# Ignore DP-specific markup
     4.8 +dp=false
     4.9 +# Echo queried line
    4.10 +echo=true
    4.11 +# Check single quotes
    4.12 +squote=false
    4.13 +# Check common typos
    4.14 +typo=true
    4.15 +# Require closure of quotes on every paragraph
    4.16 +qpara=false
    4.17 +# Enable paranoid querying of everything
    4.18 +paranoid=true
    4.19 +# Enable line end checking
    4.20 +line-end=true
    4.21 +# Overview: just show counts
    4.22 +overview=false
    4.23 +# Output errors to stdout instead of stderr
    4.24 +stdout=false
    4.25 +# Echo header fields
    4.26 +header=false
    4.27 +# Ignore markup in < >
    4.28 +markup=false
    4.29 +# Use file of user-defined typos
    4.30 +usertypo=false
    4.31 +# Defaults for use on www upload
    4.32 +web=false
    4.33 +# Verbose - list everything
    4.34 +verbose=false
    4.35 +# Set of characters valid for this ebook
    4.36 +charset=auto
     5.1 --- a/test/bookloupe/Makefile.am	Thu Oct 03 22:59:44 2013 +0100
     5.2 +++ b/test/bookloupe/Makefile.am	Thu Oct 03 23:00:49 2013 +0100
     5.3 @@ -1,5 +1,7 @@
     5.4  TESTS_ENVIRONMENT=BOOKLOUPE=../../bookloupe/bookloupe ../harness/loupe-test
     5.5  TESTS=non-ascii.tst long-line.tst curved-single-quotes.tst curved-quotes.tst \
     5.6 -	runfox-quotes.tst curved-genitives.tst multi-line-illustration.tst
     5.7 +	runfox-quotes.tst curved-genitives.tst multi-line-illustration.tst \
     5.8 +	config-internal.tst config-default.tst config-user.tst \
     5.9 +	config-override.tst charset-cp1252.tst charset-latin1.tst
    5.10  
    5.11  dist_pkgdata_DATA=$(TESTS)
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/test/bookloupe/charset-cp1252.tst	Thu Oct 03 23:00:49 2013 +0100
     6.3 @@ -0,0 +1,16 @@
     6.4 +**************** OPTIONS ****************
     6.5 +--charset=WINDOWS-1252
     6.6 +**************** ENCODING ****************
     6.7 +WINDOWS-1252
     6.8 +**************** INPUT ****************
     6.9 +Unless binary mode is engaged, gutcheck will warn about a number of
    6.10 +characters defined in Windows-1252. Bookloupe provides support for
    6.11 +disabling such checks without concern as to the file size and how
    6.12 +many characters with the eighth bit set it may contain by allowing a
    6.13 +character set to be declared. With the character set declared as
    6.14 +WINDOWS-1252, all characters defined in Windows-1252 shoud be acceptable
    6.15 +and no warnings should be issued.
    6.16 +
    6.17 +We test for this by including just one such character—the em dash.
    6.18 +
    6.19 +**************** EXPECTED ****************
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/test/bookloupe/charset-latin1.tst	Thu Oct 03 23:00:49 2013 +0100
     7.3 @@ -0,0 +1,58 @@
     7.4 +**************** OPTIONS ****************
     7.5 +--charset=ISO-8859-1
     7.6 +**************** ENCODING ****************
     7.7 +WINDOWS-1252
     7.8 +**************** INPUT ****************
     7.9 +Where the character set declared is narrower than the character set
    7.10 +implied by the encoding as in this case (Windows-1252 is a superset
    7.11 +of the first latin alphabet defined in ECMA 94), then bookloupe should
    7.12 +warn about characters that are not in the declared character set but
    7.13 +should still recognise them and otherwise handle them as it would
    7.14 +normally do. We use the curved apostrophe as a test for this since
    7.15 +if bookloupe didn't recognise it then it would query the orphaned
    7.16 +letters from the genitives and abbreviations.
    7.17 +
    7.18 +John Hendricks was bear-leading at the time. He had originally studied
    7.19 +for Holy Orders, but had abandoned the Church later for private reasons
    7.20 +connected with his faith, and had taken to teaching and tutoring
    7.21 +instead. He was an honest, upstanding fellow of five-and-thirty,
    7.22 +incorruptible, intelligent in a simple, straightforward way. He played
    7.23 +games with his head, more than most Englishmen do, but he went through
    7.24 +life without much calculation. He had qualities that made boys like
    7.25 +and respect him; he won their confidence. Poor, proud, ambitious,
    7.26 +he realised that fate offered him a chance when the Secretary of
    7.27 +State for Scotland asked him if he would give up his other pupils
    7.28 +for a year and take his son, Lord Ernie, round the world upon an
    7.29 +educational trip that might make a man of him. For Lord Ernie was the
    7.30 +only son, and the Marquess’s influence was naturally great. To have
    7.31 +deposited a regenerated Lord Ernie at the castle gates might have
    7.32 +guaranteed Hendricks’ future. After leaving Eton prematurely the lad
    7.33 +had come under Hendricks’ charge for a time, and with such excellent
    7.34 +results--‘I’d simply swear by that chap, you know,’ the boy used
    7.35 +to say--that his father, considerably impressed, and rather as a
    7.36 +last resort, had made this proposition. And Hendricks, without much
    7.37 +calculation, had accepted it. He liked ‘Bindy’ for himself. It was
    7.38 +in his heart to ‘make a man of him,’ if possible. They had now been
    7.39 +round the world together and had come up from Brindisi to the Italian
    7.40 +Lakes, and so into Switzerland. It was middle October. With a week or
    7.41 +two to spare they were making leisurely for the ancestral halls in
    7.42 +Aberdeenshire.
    7.43 +**************** EXPECTED ****************
    7.44 +
    7.45 +only son, and the Marquess’s influence was naturally great. To have
    7.46 +    Line 22 column 27 - Non-ISO-8859-1 character 8217
    7.47 +
    7.48 +guaranteed Hendricks’ future. After leaving Eton prematurely the lad
    7.49 +    Line 24 column 21 - Non-ISO-8859-1 character 8217
    7.50 +
    7.51 +had come under Hendricks’ charge for a time, and with such excellent
    7.52 +    Line 25 column 25 - Non-ISO-8859-1 character 8217
    7.53 +
    7.54 +results--‘I’d simply swear by that chap, you know,’ the boy used
    7.55 +    Line 26 column 10 - Non-ISO-8859-1 character 8216
    7.56 +
    7.57 +calculation, had accepted it. He liked ‘Bindy’ for himself. It was
    7.58 +    Line 29 column 40 - Non-ISO-8859-1 character 8216
    7.59 +
    7.60 +in his heart to ‘make a man of him,’ if possible. They had now been
    7.61 +    Line 30 column 17 - Non-ISO-8859-1 character 8216
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/test/bookloupe/config-default.tst	Thu Oct 03 23:00:49 2013 +0100
     8.3 @@ -0,0 +1,66 @@
     8.4 +**************** OPTIONS ****************
     8.5 +--dump-config
     8.6 +**************** INPUT(bookloupe.ini) ****************
     8.7 +# Default configuration for bookloupe
     8.8 +
     8.9 +[options]
    8.10 +# Ignore DP-specific markup
    8.11 +dp=false
    8.12 +# Echo queried line
    8.13 +echo=true
    8.14 +# Check single quotes
    8.15 +squote=false
    8.16 +# Check common typos
    8.17 +typo=true
    8.18 +# Require closure of quotes on every paragraph
    8.19 +qpara=false
    8.20 +# Enable paranoid querying of everything
    8.21 +paranoid=true
    8.22 +# Enable line end checking
    8.23 +line-end=true
    8.24 +# Overview: just show counts
    8.25 +overview=false
    8.26 +# Output errors to stdout instead of stderr
    8.27 +stdout=false
    8.28 +# Echo header fields
    8.29 +header=false
    8.30 +# Ignore markup in < >
    8.31 +markup=false
    8.32 +# Use file of user-defined typos
    8.33 +usertypo=false
    8.34 +# Verbose - list everything
    8.35 +verbose=false
    8.36 +# Set of characters valid for this ebook
    8.37 +charset=auto
    8.38 +**************** EXPECTED(stdout) ****************
    8.39 +# Default configuration for bookloupe
    8.40 +
    8.41 +[options]
    8.42 +# Ignore DP-specific markup
    8.43 +dp=false
    8.44 +# Echo queried line
    8.45 +echo=true
    8.46 +# Check single quotes
    8.47 +squote=false
    8.48 +# Check common typos
    8.49 +typo=true
    8.50 +# Require closure of quotes on every paragraph
    8.51 +qpara=false
    8.52 +# Enable paranoid querying of everything
    8.53 +paranoid=true
    8.54 +# Enable line end checking
    8.55 +line-end=true
    8.56 +# Overview: just show counts
    8.57 +overview=false
    8.58 +# Output errors to stdout instead of stderr
    8.59 +stdout=false
    8.60 +# Echo header fields
    8.61 +header=false
    8.62 +# Ignore markup in < >
    8.63 +markup=false
    8.64 +# Use file of user-defined typos
    8.65 +usertypo=false
    8.66 +# Verbose - list everything
    8.67 +verbose=false
    8.68 +# Set of characters valid for this ebook
    8.69 +charset=auto
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/test/bookloupe/config-internal.tst	Thu Oct 03 23:00:49 2013 +0100
     9.3 @@ -0,0 +1,34 @@
     9.4 +**************** OPTIONS ****************
     9.5 +--dump-config
     9.6 +**************** EXPECTED(stdout) ****************
     9.7 +# Default configuration for bookloupe
     9.8 +
     9.9 +[options]
    9.10 +# Ignore DP-specific markup
    9.11 +dp=false
    9.12 +# Echo queried line
    9.13 +echo=true
    9.14 +# Check single quotes
    9.15 +squote=false
    9.16 +# Check common typos
    9.17 +typo=true
    9.18 +# Require closure of quotes on every paragraph
    9.19 +qpara=false
    9.20 +# Enable paranoid querying of everything
    9.21 +paranoid=true
    9.22 +# Enable line end checking
    9.23 +line-end=true
    9.24 +# Overview: just show counts
    9.25 +overview=false
    9.26 +# Output errors to stdout instead of stderr
    9.27 +stdout=false
    9.28 +# Echo header fields
    9.29 +header=false
    9.30 +# Ignore markup in < >
    9.31 +markup=false
    9.32 +# Use file of user-defined typos
    9.33 +usertypo=false
    9.34 +# Verbose - list everything
    9.35 +verbose=false
    9.36 +# Set of characters valid for this ebook
    9.37 +charset=auto
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/test/bookloupe/config-override.tst	Thu Oct 03 23:00:49 2013 +0100
    10.3 @@ -0,0 +1,68 @@
    10.4 +**************** OPTIONS ****************
    10.5 +--usertypo
    10.6 +--charset=auto
    10.7 +--dump-config
    10.8 +**************** INPUT(bookloupe.ini) ****************
    10.9 +# Relaxed configuration for bookloupe
   10.10 +
   10.11 +[options]
   10.12 +# Ignore DP-specific markup
   10.13 +dp=false
   10.14 +# Echo queried line
   10.15 +echo=true
   10.16 +# Check single quotes
   10.17 +squote=false
   10.18 +# Check common typos
   10.19 +typo=true
   10.20 +# Require closure of quotes on every paragraph
   10.21 +qpara=false
   10.22 +# Enable paranoid querying of everything
   10.23 +paranoid=false
   10.24 +# Enable line end checking
   10.25 +line-end=true
   10.26 +# Overview: just show counts
   10.27 +overview=false
   10.28 +# Output errors to stdout instead of stderr
   10.29 +stdout=false
   10.30 +# Echo header fields
   10.31 +header=false
   10.32 +# Ignore markup in < >
   10.33 +markup=false
   10.34 +# Use file of user-defined typos
   10.35 +usertypo=false
   10.36 +# Verbose - list everything
   10.37 +verbose=false
   10.38 +# Set of characters valid for this ebook
   10.39 +charset=UNICODE
   10.40 +**************** EXPECTED(stdout) ****************
   10.41 +# Relaxed configuration for bookloupe
   10.42 +
   10.43 +[options]
   10.44 +# Ignore DP-specific markup
   10.45 +dp=false
   10.46 +# Echo queried line
   10.47 +echo=true
   10.48 +# Check single quotes
   10.49 +squote=false
   10.50 +# Check common typos
   10.51 +typo=true
   10.52 +# Require closure of quotes on every paragraph
   10.53 +qpara=false
   10.54 +# Enable paranoid querying of everything
   10.55 +paranoid=false
   10.56 +# Enable line end checking
   10.57 +line-end=true
   10.58 +# Overview: just show counts
   10.59 +overview=false
   10.60 +# Output errors to stdout instead of stderr
   10.61 +stdout=false
   10.62 +# Echo header fields
   10.63 +header=false
   10.64 +# Ignore markup in < >
   10.65 +markup=false
   10.66 +# Use file of user-defined typos
   10.67 +usertypo=true
   10.68 +# Verbose - list everything
   10.69 +verbose=false
   10.70 +# Set of characters valid for this ebook
   10.71 +charset=auto
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/test/bookloupe/config-user.tst	Thu Oct 03 23:00:49 2013 +0100
    11.3 @@ -0,0 +1,76 @@
    11.4 +**************** OPTIONS ****************
    11.5 +--dump-config
    11.6 +**************** INPUT(bookloupe.ini) ****************
    11.7 +# Mary Contrary's configuration for bookloupe
    11.8 +
    11.9 +# Bookloupe will ignore this group, but it's nice to have.
   11.10 +[other]
   11.11 +# Look at me!
   11.12 +name="Mary Contrary"
   11.13 +
   11.14 +[options]
   11.15 +# Ignore DP-specific markup - sounds useful
   11.16 +dp=true
   11.17 +# Echo queried line - what's the point of that?
   11.18 +echo=false
   11.19 +# Check single quotes - yup
   11.20 +squote=true
   11.21 +# Check common typos - waste of time
   11.22 +typo=false
   11.23 +# Require closure of quotes on every paragraph - okay
   11.24 +qpara=true
   11.25 +# Enable paranoid querying of everything - Huh?
   11.26 +paranoid=false
   11.27 +# Enable line end checking - pointless
   11.28 +line-end=false
   11.29 +# Overview: just show counts - Brief is good
   11.30 +overview=true
   11.31 +# Output errors to stdout instead of stderr - keeps things together
   11.32 +stdout=true
   11.33 +# Echo header fields - I'd rather see it
   11.34 +header=true
   11.35 +# Ignore markup in < > - Need this
   11.36 +markup=true
   11.37 +# Use file of user-defined typos - And this
   11.38 +usertypo=true
   11.39 +# Verbose - list everything - Contrary by name...
   11.40 +verbose=true
   11.41 +# Set of characters valid for this ebook - Let's stick with Latin1
   11.42 +charset=ISO-8859-1
   11.43 +**************** EXPECTED(stdout) ****************
   11.44 +# Mary Contrary's configuration for bookloupe
   11.45 +
   11.46 +# Bookloupe will ignore this group, but it's nice to have.
   11.47 +[other]
   11.48 +# Look at me!
   11.49 +name="Mary Contrary"
   11.50 +
   11.51 +[options]
   11.52 +# Ignore DP-specific markup - sounds useful
   11.53 +dp=true
   11.54 +# Echo queried line - what's the point of that?
   11.55 +echo=false
   11.56 +# Check single quotes - yup
   11.57 +squote=true
   11.58 +# Check common typos - waste of time
   11.59 +typo=false
   11.60 +# Require closure of quotes on every paragraph - okay
   11.61 +qpara=true
   11.62 +# Enable paranoid querying of everything - Huh?
   11.63 +paranoid=false
   11.64 +# Enable line end checking - pointless
   11.65 +line-end=false
   11.66 +# Overview: just show counts - Brief is good
   11.67 +overview=true
   11.68 +# Output errors to stdout instead of stderr - keeps things together
   11.69 +stdout=true
   11.70 +# Echo header fields - I'd rather see it
   11.71 +header=true
   11.72 +# Ignore markup in < > - Need this
   11.73 +markup=true
   11.74 +# Use file of user-defined typos - And this
   11.75 +usertypo=true
   11.76 +# Verbose - list everything - Contrary by name...
   11.77 +verbose=true
   11.78 +# Set of characters valid for this ebook - Let's stick with Latin1
   11.79 +charset=ISO-8859-1
    12.1 --- a/test/compatibility/Makefile.am	Thu Oct 03 22:59:44 2013 +0100
    12.2 +++ b/test/compatibility/Makefile.am	Thu Oct 03 23:00:49 2013 +0100
    12.3 @@ -6,6 +6,7 @@
    12.4  	user-defined-typo.tst brackets.tst single-quotes.tst grave-quotes.tst \
    12.5  	dashes.tst control-characters.tst unusual-characters.tst \
    12.6  	windows-1252.tst periods.tst long-line.tst unmarked-paragraph.tst \
    12.7 +	paranoid.tst paranoid-typos.tst no-paranoid.tst no-paranoid-typos.tst \
    12.8  	hebe-jeebies.tst mail-from.tst scannos.tst before-comma.tst \
    12.9  	before-period.tst double-punctuation.tst genitives.tst embedded-cr.tst \
   12.10  	continuing-quotes.tst
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/test/compatibility/no-paranoid-typos.tst	Thu Oct 03 23:00:49 2013 +0100
    13.3 @@ -0,0 +1,12 @@
    13.4 +**************** OPTIONS ****************
    13.5 +-x
    13.6 +-t
    13.7 +**************** INPUT ****************
    13.8 +In paranoid mode we check for a standalone digits. 1 think this is a useful
    13.9 +feature. When checking for typos every, strangly placed comma is reported.
   13.10 +
   13.11 +If paranoid mode is switched off, we can still check for typos.
   13.12 +**************** EXPECTED ****************
   13.13 +
   13.14 +feature. When checking for typos every, strangly placed comma is reported.
   13.15 +    Line 2 column 39 - Query punctuation after every?
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/test/compatibility/no-paranoid.tst	Thu Oct 03 23:00:49 2013 +0100
    14.3 @@ -0,0 +1,8 @@
    14.4 +**************** OPTIONS ****************
    14.5 +-x
    14.6 +**************** INPUT ****************
    14.7 +In paranoid mode we check for a standalone digits. 1 think this is a useful
    14.8 +feature. When checking for typos every, strangly placed comma is reported.
    14.9 +
   14.10 +If paranoid mode is switched off, checking for typos defaults to off too.
   14.11 +**************** EXPECTED ****************
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/test/compatibility/paranoid-typos.tst	Thu Oct 03 23:00:49 2013 +0100
    15.3 @@ -0,0 +1,12 @@
    15.4 +**************** OPTIONS ****************
    15.5 +-t
    15.6 +**************** INPUT ****************
    15.7 +In paranoid mode we check for a standalone digits. 1 think this is a useful
    15.8 +feature. When checking for typos every, strangly placed comma is reported.
    15.9 +
   15.10 +In paranoid mode (the default), typo checking is switched off with its
   15.11 +short option.
   15.12 +**************** EXPECTED ****************
   15.13 +
   15.14 +In paranoid mode we check for a standalone digits. 1 think this is a useful
   15.15 +    Line 1 column 51 - Query standalone 1
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/test/compatibility/paranoid.tst	Thu Oct 03 23:00:49 2013 +0100
    16.3 @@ -0,0 +1,12 @@
    16.4 +**************** INPUT ****************
    16.5 +In paranoid mode we check for a standalone digits. 1 think this is a useful
    16.6 +feature. When checking for typos every, strangly placed comma is reported.
    16.7 +
    16.8 +By default, both paranoid mode and checking for typos should be on.
    16.9 +**************** EXPECTED ****************
   16.10 +
   16.11 +In paranoid mode we check for a standalone digits. 1 think this is a useful
   16.12 +    Line 1 column 51 - Query standalone 1
   16.13 +
   16.14 +feature. When checking for typos every, strangly placed comma is reported.
   16.15 +    Line 2 column 39 - Query punctuation after every?
    17.1 --- a/test/harness/Makefile.am	Thu Oct 03 22:59:44 2013 +0100
    17.2 +++ b/test/harness/Makefile.am	Thu Oct 03 23:00:49 2013 +0100
    17.3 @@ -5,5 +5,6 @@
    17.4  
    17.5  loupe_test_SOURCES=loupe-test.c testcase.c testcase.h testcaseio.c \
    17.6  	testcaseio.h testcaseparser.c testcaseparser.h testcaseinput.c \
    17.7 -	testcaseinput.h warningsparser.c warningsparser.h
    17.8 +	testcaseinput.h testcaseoutput.c testcaseoutput.h warningsparser.c \
    17.9 +	warningsparser.h
   17.10  loupe_test_LDADD=../../bl/libbl.la
    18.1 --- a/test/harness/loupe-test.c	Thu Oct 03 22:59:44 2013 +0100
    18.2 +++ b/test/harness/loupe-test.c	Thu Oct 03 23:00:49 2013 +0100
    18.3 @@ -48,6 +48,7 @@
    18.4  	exit(1);
    18.5      }
    18.6      bl_set_print_handlers();
    18.7 +    g_setenv("BOOKLOUPE_CONFIG_PATH",".",TRUE);
    18.8      for(i=1;i<argc;i++)
    18.9  	pass&=run_test(argv[i]);
   18.10      return pass?0:1;
    19.1 --- a/test/harness/testcase.c	Thu Oct 03 22:59:44 2013 +0100
    19.2 +++ b/test/harness/testcase.c	Thu Oct 03 23:00:49 2013 +0100
    19.3 @@ -7,6 +7,7 @@
    19.4  #include <bl/bl.h>
    19.5  #include "testcase.h"
    19.6  #include "testcaseinput.h"
    19.7 +#include "testcaseoutput.h"
    19.8  
    19.9  GQuark testcase_error_quark(void)
   19.10  {
   19.11 @@ -171,6 +172,64 @@
   19.12      return g_string_free(filename,FALSE);
   19.13  }
   19.14  
   19.15 +/*
   19.16 + * Verify that all the output files specified by a testcase are present
   19.17 + * with the expected contents. 
   19.18 + */
   19.19 +gboolean testcase_verify_output_files(Testcase *testcase)
   19.20 +{
   19.21 +    GSList *link;
   19.22 +    GError *tmp_err=NULL;
   19.23 +    gboolean retval=TRUE;
   19.24 +    ssize_t offset;
   19.25 +    gchar *contents;
   19.26 +    TestcaseOutput *output;
   19.27 +    for(link=testcase->outputs;link;link=link->next)
   19.28 +    {
   19.29 +	output=link->data;
   19.30 +	if (!testcase_output_read(testcase,output,&contents,NULL,&tmp_err))
   19.31 +	{
   19.32 +	    g_print("%s: FAIL\n",testcase->basename);
   19.33 +	    g_print("%s\n",tmp_err->message);
   19.34 +	    g_clear_error(&tmp_err);
   19.35 +	    retval=FALSE;
   19.36 +	    break;
   19.37 +	}
   19.38 +	else
   19.39 +	{
   19.40 +	    if (strcmp(contents,output->contents))
   19.41 +	    {
   19.42 +		g_print("%s: FAIL\n",testcase->basename);
   19.43 +		offset=common_prefix_length(contents,output->contents);
   19.44 +		if (!offset && !contents[offset])
   19.45 +		    g_print("%s: Unexpected empty output from bookloupe.\n",
   19.46 +		      output->name);
   19.47 +		else
   19.48 +		{
   19.49 +		    g_print("%s: Unexpected output from bookloupe:\n",
   19.50 +		      output->name);
   19.51 +		    print_unexpected(contents,offset);
   19.52 +		}
   19.53 +		retval=FALSE;
   19.54 +	    }
   19.55 +	    g_free(contents);
   19.56 +	    break;
   19.57 +	}
   19.58 +    }
   19.59 +    for(link=testcase->outputs;link;link=link->next)
   19.60 +	if (!testcase_output_remove(testcase,link->data,&tmp_err))
   19.61 +	{
   19.62 +	    if (retval)
   19.63 +	    {
   19.64 +		g_print("%s: FAIL\n",testcase->basename);
   19.65 +		g_print("%s\n",tmp_err->message);
   19.66 +		retval=TRUE;
   19.67 +	    }
   19.68 +	    g_clear_error(&tmp_err);
   19.69 +	}
   19.70 +    return retval;
   19.71 +}
   19.72 +
   19.73  gboolean testcase_spawn_bookloupe(Testcase *testcase,char **standard_output,
   19.74    GError **error)
   19.75  {
   19.76 @@ -460,7 +519,7 @@
   19.77      gboolean r;
   19.78      size_t pos,offset;
   19.79      GString *header;
   19.80 -    char *output,*filename,*s,*xfail=NULL;
   19.81 +    char *filename,*s,*xfail=NULL;
   19.82      GError *error=NULL;
   19.83      if (!testcase_create_input_files(testcase,&error))
   19.84      {
   19.85 @@ -469,7 +528,7 @@
   19.86  	g_error_free(error);
   19.87  	return FALSE;
   19.88      }
   19.89 -    r=testcase_spawn_bookloupe(testcase,&output,&error);
   19.90 +    r=testcase_spawn_bookloupe(testcase,&testcase->test_output,&error);
   19.91      if (!r)
   19.92      {
   19.93  	g_print("%s: FAIL\n",testcase->basename);
   19.94 @@ -486,35 +545,40 @@
   19.95  	g_error_free(error);
   19.96  	return FALSE;
   19.97      }
   19.98 -    header=g_string_new("\n\nFile: ");
   19.99 -    g_string_append(header,filename);
  19.100 -    g_string_append(header,"\n");
  19.101 -    if (!g_str_has_prefix(output,header->str))
  19.102 +    if (testcase->expected || testcase->warnings)
  19.103      {
  19.104 -	g_print("%s: FAIL\n",testcase->basename);
  19.105 -	g_print("Unexpected header from bookloupe:\n");
  19.106 -	offset=common_prefix_length(output,header->str);
  19.107 -	print_unexpected(output,offset);
  19.108 -	r=FALSE;
  19.109 -    }
  19.110 -    pos=header->len;
  19.111 -    if (r)
  19.112 -    {
  19.113 -	/* Skip the summary */
  19.114 -	s=strstr(output+pos,"\n\n");
  19.115 -	if (s)
  19.116 -	    pos=s-output+2;
  19.117 -	else
  19.118 +	header=g_string_new("\n\nFile: ");
  19.119 +	g_string_append(header,filename);
  19.120 +	g_string_append(header,"\n");
  19.121 +	if (!g_str_has_prefix(testcase->test_output,header->str))
  19.122  	{
  19.123  	    g_print("%s: FAIL\n",testcase->basename);
  19.124 -	    g_print("Unterminated summary from bookloupe:\n%s\n",output+pos);
  19.125 +	    g_print("Unexpected header from bookloupe:\n");
  19.126 +	    offset=common_prefix_length(testcase->test_output,header->str);
  19.127 +	    print_unexpected(testcase->test_output,offset);
  19.128  	    r=FALSE;
  19.129  	}
  19.130 +	pos=header->len;
  19.131 +	if (r)
  19.132 +	{
  19.133 +	    /* Skip the summary */
  19.134 +	    s=strstr(testcase->test_output+pos,"\n\n");
  19.135 +	    if (s)
  19.136 +		pos=s-testcase->test_output+2;
  19.137 +	    else
  19.138 +	    {
  19.139 +		g_print("%s: FAIL\n",testcase->basename);
  19.140 +		g_print("Unterminated summary from bookloupe:\n%s\n",
  19.141 +		  testcase->test_output+pos);
  19.142 +		r=FALSE;
  19.143 +	    }
  19.144 +	}
  19.145 +	g_string_free(header,TRUE);
  19.146 +	r=testcase_check_warnings(testcase,testcase->test_output+pos,&xfail);
  19.147      }
  19.148 -    g_string_free(header,TRUE);
  19.149 -    r=testcase_check_warnings(testcase,output+pos,&xfail);
  19.150 +    if (!testcase_verify_output_files(testcase))
  19.151 +	r=FALSE;
  19.152      g_free(filename);
  19.153 -    g_free(output);
  19.154      if (r)
  19.155      {
  19.156  	if (xfail)
  19.157 @@ -575,5 +639,6 @@
  19.158      g_slist_free(testcase->warnings);
  19.159      g_free(testcase->encoding);
  19.160      g_strfreev(testcase->options);
  19.161 +    g_free(testcase->test_output);
  19.162      g_free(testcase);
  19.163  }
    20.1 --- a/test/harness/testcase.h	Thu Oct 03 22:59:44 2013 +0100
    20.2 +++ b/test/harness/testcase.h	Thu Oct 03 23:00:49 2013 +0100
    20.3 @@ -37,10 +37,12 @@
    20.4      char *basename;
    20.5      char *tmpdir;
    20.6      GSList *inputs;
    20.7 +    GSList *outputs;
    20.8      char *expected;
    20.9      GSList *warnings;
   20.10      char *encoding;	/* The character encoding to talk to BOOKLOUPE in */
   20.11      char **options;
   20.12 +    char *test_output;
   20.13      enum {
   20.14  	TESTCASE_XFAIL=1<<0,
   20.15  	TESTCASE_TMP_DIR=1<<1,
    21.1 --- a/test/harness/testcaseio.c	Thu Oct 03 22:59:44 2013 +0100
    21.2 +++ b/test/harness/testcaseio.c	Thu Oct 03 23:00:49 2013 +0100
    21.3 @@ -5,6 +5,7 @@
    21.4  #include <bl/bl.h>
    21.5  #include "testcaseparser.h"
    21.6  #include "testcaseinput.h"
    21.7 +#include "testcaseoutput.h"
    21.8  #include "testcaseio.h"
    21.9  #include "warningsparser.h"
   21.10  
   21.11 @@ -70,6 +71,25 @@
   21.12  	else if (!testcase->expected && !testcase->warnings &&
   21.13  	  !strcmp(tag,"EXPECTED"))
   21.14  	    testcase->expected=g_strdup(text);
   21.15 +	else if (g_str_has_prefix(tag,"EXPECTED(") && tag[strlen(tag)-1]==')')
   21.16 +	{
   21.17 +	    arg=g_strndup(tag+9,strlen(tag)-10);
   21.18 +	    s=g_path_get_dirname(arg);
   21.19 +	    if (strcmp(s,"."))
   21.20 +	    {
   21.21 +		g_printerr("%s: Expected files may not have a "
   21.22 +		  "directory component\n",arg);
   21.23 +		g_free(s);
   21.24 +		g_free(arg);
   21.25 +		testcase_free(testcase);
   21.26 +		testcase_parser_free(parser);
   21.27 +		return NULL;
   21.28 +	    }
   21.29 +	    g_free(s);
   21.30 +	    testcase->outputs=g_slist_prepend(testcase->outputs,
   21.31 +	      testcase_output_new(arg,text));
   21.32 +	    g_free(arg);
   21.33 +	}
   21.34  	else if (!testcase->expected && !testcase->warnings &&
   21.35  	  !strcmp(tag,"WARNINGS"))
   21.36  	{
   21.37 @@ -88,11 +108,14 @@
   21.38  	}
   21.39  	else if (!testcase->encoding && !strcmp(tag,"ENCODING"))
   21.40  	    testcase->encoding=g_strchomp(g_strdup(text));
   21.41 -	else if (!testcase->encoding && !strcmp(tag,"OPTIONS"))
   21.42 +	else if (!testcase->options && !strcmp(tag,"OPTIONS"))
   21.43  	{
   21.44  	    testcase->options=g_strsplit(text,"\n",0);
   21.45 -	    g_free(testcase->options[g_strv_length(testcase->options)-1]);
   21.46 -	    testcase->options[g_strv_length(testcase->options)-1]=NULL;
   21.47 +	    if (testcase->options && g_strv_length(testcase->options)>0)
   21.48 +	    {
   21.49 +		g_free(testcase->options[g_strv_length(testcase->options)-1]);
   21.50 +		testcase->options[g_strv_length(testcase->options)-1]=NULL;
   21.51 +	    }
   21.52  	}
   21.53  	else
   21.54  	{
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/test/harness/testcaseoutput.c	Thu Oct 03 23:00:49 2013 +0100
    22.3 @@ -0,0 +1,140 @@
    22.4 +#include <stdlib.h>
    22.5 +#include <string.h>
    22.6 +#include <errno.h>
    22.7 +#include <glib.h>
    22.8 +#include <bl/bl.h>
    22.9 +#include "testcase.h"
   22.10 +#include "testcaseoutput.h"
   22.11 +
   22.12 +/*
   22.13 + * Replace \r\n with \n, \n with U+240A (visible symbol for LF)
   22.14 + * and \r with U+240D (visible symbol for CR).
   22.15 + */
   22.16 +static char *dos2unix(const char *text)
   22.17 +{
   22.18 +    gunichar c;
   22.19 +    gboolean cr=FALSE;
   22.20 +    const gunichar visible_lf=0x240A;
   22.21 +    const gunichar visible_cr=0x240D;
   22.22 +    GString *string;
   22.23 +    string=g_string_new(NULL);
   22.24 +    while(*text)
   22.25 +    {
   22.26 +	c=g_utf8_get_char(text);
   22.27 +	text=g_utf8_next_char(text);
   22.28 +	if (cr)
   22.29 +	{
   22.30 +	    cr=FALSE;
   22.31 +	    if (c=='\n')
   22.32 +	    {
   22.33 +		g_string_append_c(string,'\n');
   22.34 +		continue;
   22.35 +	    }
   22.36 +	    else
   22.37 +		g_string_append_unichar(string,visible_cr);
   22.38 +	}
   22.39 +	if (c=='\r')
   22.40 +	    cr=TRUE;
   22.41 +	else if (c=='\n')
   22.42 +	    g_string_append_unichar(string,visible_lf);
   22.43 +	else
   22.44 +	    g_string_append_unichar(string,c);
   22.45 +    }
   22.46 +    if (cr)
   22.47 +	g_string_append_unichar(string,visible_cr);
   22.48 +    return g_string_free(string,FALSE);
   22.49 +}
   22.50 +
   22.51 +/*
   22.52 + * Read an output file needed for a testcase (as specified in <output>).
   22.53 + * The file is read in the encoding specified for communicating with
   22.54 + * bookloupe.
   22.55 + */
   22.56 +gboolean testcase_output_read(Testcase *testcase,TestcaseOutput *output,
   22.57 +  gchar **contents,gsize *length,GError **error)
   22.58 +{
   22.59 +    char *filename,*s,*t;
   22.60 +    gboolean retval;
   22.61 +    GError *tmp_err=NULL;
   22.62 +    if (!strcmp(output->name,"stdout"))
   22.63 +    {
   22.64 +	*contents=g_strdup(testcase->test_output);
   22.65 +	if (length)
   22.66 +	    *length=strlen(testcase->test_output);
   22.67 +    }
   22.68 +    else
   22.69 +    {
   22.70 +	if (testcase->tmpdir)
   22.71 +	    filename=g_build_filename(testcase->tmpdir,output->name,NULL);
   22.72 +	else
   22.73 +	    filename=g_strdup(output->name);
   22.74 +	if (!g_file_get_contents(filename,&s,NULL,error))
   22.75 +	{
   22.76 +	    g_free(filename);
   22.77 +	    return FALSE;
   22.78 +	}
   22.79 +	g_free(filename);
   22.80 +	if (testcase->encoding)
   22.81 +	{
   22.82 +	    t=dos2unix(s);
   22.83 +	    g_free(s);
   22.84 +	    s=g_convert(t,-1,"UTF-8",testcase->encoding,NULL,length,&tmp_err);
   22.85 +	    g_free(t);
   22.86 +	    if (!s)
   22.87 +	    {
   22.88 +		g_propagate_prefixed_error(error,tmp_err,
   22.89 +		  "Conversion from %s failed: ",testcase->encoding);
   22.90 +		return FALSE;
   22.91 +	    }
   22.92 +	    *contents=s;
   22.93 +	}
   22.94 +	else
   22.95 +	{
   22.96 +	    *contents=dos2unix(s);
   22.97 +	    if (length)
   22.98 +		*length=strlen(*contents);
   22.99 +	}
  22.100 +    }
  22.101 +    return TRUE;
  22.102 +}
  22.103 +
  22.104 +/*
  22.105 + * Remove an output file created by program under test.
  22.106 + */
  22.107 +gboolean testcase_output_remove(Testcase *testcase,TestcaseOutput *output,
  22.108 +  GError **error)
  22.109 +{
  22.110 +    char *filename;
  22.111 +    if (!strcmp(output->name,"stdout"))
  22.112 +	return TRUE;
  22.113 +    if (testcase->tmpdir)
  22.114 +	filename=g_build_filename(testcase->tmpdir,output->name,NULL);
  22.115 +    else
  22.116 +	filename=g_strdup(output->name);
  22.117 +    if (g_unlink(filename)<0)
  22.118 +    {
  22.119 +	g_set_error(error,G_FILE_ERROR,g_file_error_from_errno(errno),
  22.120 +	  "%s: %s",filename,g_strerror(errno));
  22.121 +	return FALSE;
  22.122 +    }
  22.123 +    g_free(filename);
  22.124 +    return TRUE;
  22.125 +}
  22.126 +
  22.127 +/* Create a new description of an output file expected by a testcase */
  22.128 +TestcaseOutput *testcase_output_new(const char *name,const char *contents)
  22.129 +{
  22.130 +    TestcaseOutput *output;
  22.131 +    output=g_new0(TestcaseOutput,1);
  22.132 +    output->name=g_strdup(name);
  22.133 +    output->contents=g_strdup(contents);
  22.134 +    return output;
  22.135 +}
  22.136 +
  22.137 +/* Free the description of a testcase output file */
  22.138 +void testcase_output_free(TestcaseOutput *output)
  22.139 +{
  22.140 +    g_free(output->name);
  22.141 +    g_free(output->contents);
  22.142 +    g_free(output);
  22.143 +}
    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/test/harness/testcaseoutput.h	Thu Oct 03 23:00:49 2013 +0100
    23.3 @@ -0,0 +1,19 @@
    23.4 +#ifndef TESTCASE_OUTPUT_H
    23.5 +#define TESTCASE_OUTPUT_H
    23.6 +
    23.7 +#include <glib.h>
    23.8 +#include "testcase.h"
    23.9 +
   23.10 +typedef struct {
   23.11 +    char *name;
   23.12 +    char *contents;
   23.13 +} TestcaseOutput;
   23.14 +
   23.15 +gboolean testcase_output_read(Testcase *testcase,TestcaseOutput *output,
   23.16 +  gchar **contents,gsize *length,GError **error);
   23.17 +gboolean testcase_output_remove(Testcase *testcase,TestcaseOutput *output,
   23.18 +  GError **error);
   23.19 +TestcaseOutput *testcase_output_new(const char *name,const char *contents);
   23.20 +void testcase_output_free(TestcaseOutput *output);
   23.21 +
   23.22 +#endif	/* TESTCASE_OUTPUT_H */