1.1 --- a/Makefile.am Thu Oct 03 22:59:44 2013 +0100
1.2 +++ b/Makefile.am Thu Oct 03 23:00:49 2013 +0100
1.3 @@ -1,1 +1,3 @@
1.4 SUBDIRS=bl bookloupe test doc
1.5 +
1.6 +dist_pkgdata_DATA=sample.ini
2.1 --- a/bookloupe/bookloupe.c Thu Oct 03 22:59:44 2013 +0100
2.2 +++ b/bookloupe/bookloupe.c Thu Oct 03 23:00:49 2013 +0100
2.3 @@ -32,6 +32,9 @@
2.4 #include "pending.h"
2.5 #include "HTMLentities.h"
2.6
2.7 +gchar *charset; /* Or NULL for auto (ISO_8859-1/ASCII or UNICODE) */
2.8 +GIConv charset_validator=(GIConv)-1;
2.9 +
2.10 gchar *prevline;
2.11
2.12 /* Common typos. */
2.13 @@ -127,36 +130,101 @@
2.14 };
2.15
2.16 gboolean pswit[SWITNO]; /* program switches */
2.17 +gchar *opt_charset;
2.18 +
2.19 +gboolean typo_compat,paranoid_compat;
2.20
2.21 static GOptionEntry options[]={
2.22 { "dp", 'd', 0, G_OPTION_ARG_NONE, pswit+DP_SWITCH,
2.23 "Ignore DP-specific markup", NULL },
2.24 - { "noecho", 'e', 0, G_OPTION_ARG_NONE, pswit+ECHO_SWITCH,
2.25 + { "no-dp", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
2.26 + G_OPTION_ARG_NONE, pswit+DP_SWITCH,
2.27 + "Don't ignore DP-specific markup", NULL },
2.28 + { "echo", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, pswit+ECHO_SWITCH,
2.29 + "Echo queried line", NULL },
2.30 + { "no-echo", 'e', G_OPTION_FLAG_REVERSE,
2.31 + G_OPTION_ARG_NONE, pswit+ECHO_SWITCH,
2.32 "Don't echo queried line", NULL },
2.33 { "squote", 's', 0, G_OPTION_ARG_NONE, pswit+SQUOTE_SWITCH,
2.34 "Check single quotes", NULL },
2.35 - { "typo", 't', 0, G_OPTION_ARG_NONE, pswit+TYPO_SWITCH,
2.36 + { "no-squote", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
2.37 + G_OPTION_ARG_NONE, pswit+SQUOTE_SWITCH,
2.38 + "Don't check single quotes", NULL },
2.39 + { "typo", 0, 0, G_OPTION_ARG_NONE, pswit+TYPO_SWITCH,
2.40 "Check common typos", NULL },
2.41 + { "no-typo", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
2.42 + G_OPTION_ARG_NONE, pswit+TYPO_SWITCH,
2.43 + "Don't check common typos", NULL },
2.44 { "qpara", 'p', 0, G_OPTION_ARG_NONE, pswit+QPARA_SWITCH,
2.45 "Require closure of quotes on every paragraph", NULL },
2.46 - { "relaxed", 'x', 0, G_OPTION_ARG_NONE, pswit+PARANOID_SWITCH,
2.47 + { "no-qpara", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
2.48 + G_OPTION_ARG_NONE, pswit+QPARA_SWITCH,
2.49 + "Don't require closure of quotes on every paragraph", NULL },
2.50 + { "paranoid", 0, G_OPTION_FLAG_HIDDEN,
2.51 + G_OPTION_ARG_NONE, pswit+PARANOID_SWITCH,
2.52 + "Enable paranoid querying of everything", NULL },
2.53 + { "no-paranoid", 0, G_OPTION_FLAG_REVERSE,
2.54 + G_OPTION_ARG_NONE, pswit+PARANOID_SWITCH,
2.55 "Disable paranoid querying of everything", NULL },
2.56 - { "line-end", 'l', 0, G_OPTION_ARG_NONE, pswit+LINE_END_SWITCH,
2.57 - "Disable line end checking", NULL },
2.58 + { "line-end", 0, G_OPTION_FLAG_HIDDEN,
2.59 + G_OPTION_ARG_NONE, pswit+LINE_END_SWITCH,
2.60 + "Enable line end checking", NULL },
2.61 + { "no-line-end", 'l', G_OPTION_FLAG_REVERSE,
2.62 + G_OPTION_ARG_NONE, pswit+LINE_END_SWITCH,
2.63 + "Diable line end checking", NULL },
2.64 { "overview", 'o', 0, G_OPTION_ARG_NONE, pswit+OVERVIEW_SWITCH,
2.65 "Overview: just show counts", NULL },
2.66 + { "no-overview", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
2.67 + G_OPTION_ARG_NONE, pswit+OVERVIEW_SWITCH,
2.68 + "Show individual warnings", NULL },
2.69 { "stdout", 'y', 0, G_OPTION_ARG_NONE, pswit+STDOUT_SWITCH,
2.70 "Output errors to stdout instead of stderr", NULL },
2.71 + { "no-stdout", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
2.72 + G_OPTION_ARG_NONE, pswit+STDOUT_SWITCH,
2.73 + "Output errors to stderr instead of stdout", NULL },
2.74 { "header", 'h', 0, G_OPTION_ARG_NONE, pswit+HEADER_SWITCH,
2.75 "Echo header fields", NULL },
2.76 + { "no-header", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
2.77 + G_OPTION_ARG_NONE, pswit+HEADER_SWITCH,
2.78 + "Don't echo header fields", NULL },
2.79 { "markup", 'm', 0, G_OPTION_ARG_NONE, pswit+MARKUP_SWITCH,
2.80 "Ignore markup in < >", NULL },
2.81 + { "no-markup", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
2.82 + G_OPTION_ARG_NONE, pswit+MARKUP_SWITCH,
2.83 + "No special handling for markup in < >", NULL },
2.84 { "usertypo", 'u', 0, G_OPTION_ARG_NONE, pswit+USERTYPO_SWITCH,
2.85 "Use file of user-defined typos", NULL },
2.86 + { "no-usertypo", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
2.87 + G_OPTION_ARG_NONE, pswit+USERTYPO_SWITCH,
2.88 + "Ignore file of user-defined typos", NULL },
2.89 + { "verbose", 'v', 0, G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
2.90 + "Verbose - list everything", NULL },
2.91 + { "no-verbose", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
2.92 + G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
2.93 + "Switch off verbose mode", NULL },
2.94 + { "charset", 0, 0, G_OPTION_ARG_STRING, &opt_charset,
2.95 + "Set of characters valid for this ebook", "NAME" },
2.96 + { NULL }
2.97 +};
2.98 +
2.99 +/*
2.100 + * Options relating to configuration which make no sense from inside
2.101 + * a configuration file.
2.102 + */
2.103 +
2.104 +static GOptionEntry config_options[]={
2.105 { "web", 'w', 0, G_OPTION_ARG_NONE, pswit+WEB_SWITCH,
2.106 "Defaults for use on www upload", NULL },
2.107 - { "verbose", 'v', 0, G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
2.108 - "Verbose - list everything", NULL },
2.109 + { "dump-config", 0, 0, G_OPTION_ARG_NONE, pswit+DUMP_CONFIG_SWITCH,
2.110 + "Dump current config settings", NULL },
2.111 + { NULL }
2.112 +};
2.113 +
2.114 +static GOptionEntry compatibility_options[]={
2.115 + { "toggle-typo", 't', 0, G_OPTION_ARG_NONE, &typo_compat,
2.116 + "Toggle checking for common typos", NULL },
2.117 + { "toggle-relaxed", 'x', 0, G_OPTION_ARG_NONE, ¶noid_compat,
2.118 + "Toggle both paranoid mode and common typos", NULL },
2.119 { NULL }
2.120 };
2.121
2.122 @@ -200,31 +268,275 @@
2.123 UINT saved_cp;
2.124 #endif
2.125
2.126 +gboolean set_charset(const char *name,GError **err)
2.127 +{
2.128 + /* The various UNICODE encodings all share the same character set. */
2.129 + const char *unicode_aliases[]={ "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4",
2.130 + "UCS-4BE", "UCS-4LE", "UCS2", "UCS4", "UNICODE", "UNICODEBIG",
2.131 + "UNICODELITTLE", "UTF-7", "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE",
2.132 + "UTF-32", "UTF-32BE", "UTF-32LE", "UTF7", "UTF8", "UTF16", "UTF16BE",
2.133 + "UTF16LE", "UTF32", "UTF32BE", "UTF32LE" };
2.134 + int i;
2.135 + if (charset)
2.136 + g_free(charset);
2.137 + if (charset_validator!=(GIConv)-1)
2.138 + g_iconv_close(charset_validator);
2.139 + if (!name || !g_strcasecmp(name,"auto"))
2.140 + {
2.141 + charset=NULL;
2.142 + charset_validator=(GIConv)-1;
2.143 + return TRUE;
2.144 + }
2.145 + else
2.146 + charset=g_strdup(name);
2.147 + for(i=0;i<G_N_ELEMENTS(unicode_aliases);i++)
2.148 + if (!g_strcasecmp(charset,unicode_aliases[i]))
2.149 + {
2.150 + g_free(charset);
2.151 + charset=g_strdup("UTF-8");
2.152 + break;
2.153 + }
2.154 + if (!strcmp(charset,"UTF-8"))
2.155 + charset_validator=(GIConv)-1;
2.156 + else
2.157 + {
2.158 + charset_validator=g_iconv_open(charset,"UTF-8");
2.159 + if (charset_validator==(GIConv)-1)
2.160 + {
2.161 + g_set_error(err,G_CONVERT_ERROR,G_CONVERT_ERROR_NO_CONVERSION,
2.162 + "Unknown character set \"%s\"",charset);
2.163 + return FALSE;
2.164 + }
2.165 + }
2.166 + return TRUE;
2.167 +}
2.168 +
2.169 +GKeyFile *config;
2.170 +
2.171 +void config_file_update(GKeyFile *kf)
2.172 +{
2.173 + int i;
2.174 + const char *s;
2.175 + gboolean sw;
2.176 + for(i=0;options[i].long_name;i++)
2.177 + {
2.178 + if (g_str_has_prefix(options[i].long_name,"no-"))
2.179 + continue;
2.180 + if (options[i].arg==G_OPTION_ARG_NONE)
2.181 + {
2.182 + sw=*(gboolean *)options[i].arg_data;
2.183 + if (options[i].flags&G_OPTION_FLAG_REVERSE)
2.184 + sw=!sw;
2.185 + g_key_file_set_boolean(kf,"options",options[i].long_name,sw);
2.186 + }
2.187 + else if (options[i].arg==G_OPTION_ARG_STRING)
2.188 + {
2.189 + s=*(gchar **)options[i].arg_data;
2.190 + if (!s)
2.191 + s="auto";
2.192 + g_key_file_set_string(kf,"options",options[i].long_name,s);
2.193 + }
2.194 + else
2.195 + g_assert_not_reached();
2.196 + }
2.197 +}
2.198 +
2.199 +void config_file_add_comments(GKeyFile *kf)
2.200 +{
2.201 + int i;
2.202 + gchar *comment;
2.203 + g_key_file_set_comment(kf,NULL,NULL," Default configuration for bookloupe",
2.204 + NULL);
2.205 + for(i=0;options[i].long_name;i++)
2.206 + {
2.207 + if (g_str_has_prefix(options[i].long_name,"no-"))
2.208 + continue;
2.209 + comment=g_strconcat(" ",options[i].description,NULL);
2.210 + g_key_file_set_comment(kf,"options",options[i].long_name,comment,NULL);
2.211 + g_free(comment);
2.212 + }
2.213 +}
2.214 +
2.215 +void dump_config(void)
2.216 +{
2.217 + gchar *s;
2.218 + if (config)
2.219 + config_file_update(config);
2.220 + else
2.221 + {
2.222 + config=g_key_file_new();
2.223 + config_file_update(config);
2.224 + config_file_add_comments(config);
2.225 + }
2.226 + s=g_key_file_to_data(config,NULL,NULL);
2.227 + if (s)
2.228 + g_print("%s",s);
2.229 + g_free(s);
2.230 +}
2.231 +
2.232 +GKeyFile *read_config_file(gchar **full_path)
2.233 +{
2.234 + int i;
2.235 + GError *err=NULL;
2.236 + gchar **search_dirs;
2.237 + gchar *path;
2.238 + const char *search_path;
2.239 + GKeyFile *kf;
2.240 + kf=g_key_file_new();
2.241 + search_path=g_getenv("BOOKLOUPE_CONFIG_PATH");
2.242 + if (search_path)
2.243 + {
2.244 +#ifdef __WIN32__
2.245 + search_dirs=g_strsplit(search_path,";",0);
2.246 +#else
2.247 + search_dirs=g_strsplit(search_path,":",0);
2.248 +#endif
2.249 + }
2.250 + else
2.251 + {
2.252 + search_dirs=g_new(gchar *,4);
2.253 + search_dirs[0]=g_get_current_dir();
2.254 + search_dirs[1]=g_strdup(running_from);
2.255 + search_dirs[2]=g_strdup(g_get_user_config_dir());
2.256 + search_dirs[3]=NULL;
2.257 + }
2.258 + for(i=0;search_dirs[i];i++)
2.259 + {
2.260 + path=g_build_filename(search_dirs[i],"bookloupe.ini",NULL);
2.261 + if (g_key_file_load_from_file(kf,path,
2.262 + G_KEY_FILE_KEEP_COMMENTS|G_KEY_FILE_KEEP_TRANSLATIONS,&err))
2.263 + break;
2.264 + if (!g_error_matches(err,G_FILE_ERROR,G_FILE_ERROR_NOENT))
2.265 + {
2.266 + g_printerr("Bookloupe: Error reading %s\n",path);
2.267 + g_printerr("%s\n",err->message);
2.268 + exit(1);
2.269 + }
2.270 + g_clear_error(&err);
2.271 + g_free(path);
2.272 + path=NULL;
2.273 + }
2.274 + if (!search_dirs[i])
2.275 + {
2.276 + g_key_file_free(kf);
2.277 + kf=NULL;
2.278 + }
2.279 + g_strfreev(search_dirs);
2.280 + if (full_path && kf)
2.281 + *full_path=path;
2.282 + else
2.283 + g_free(path);
2.284 + return kf;
2.285 +}
2.286 +
2.287 +void parse_config_file(void)
2.288 +{
2.289 + int i,j;
2.290 + gchar *path,*s;
2.291 + gchar **keys;
2.292 + gboolean sw;
2.293 + GError *err=NULL;
2.294 + config=read_config_file(&path);
2.295 + if (config)
2.296 + keys=g_key_file_get_keys(config,"options",NULL,NULL);
2.297 + else
2.298 + keys=NULL;
2.299 + if (keys)
2.300 + {
2.301 + for(i=0;keys[i];i++)
2.302 + {
2.303 + for(j=0;options[j].long_name;j++)
2.304 + {
2.305 + if (g_str_has_prefix(options[j].long_name,"no-"))
2.306 + continue;
2.307 + else if (!strcmp(keys[i],options[j].long_name))
2.308 + {
2.309 + if (options[j].arg==G_OPTION_ARG_NONE)
2.310 + {
2.311 + sw=g_key_file_get_boolean(config,"options",keys[i],
2.312 + &err);
2.313 + if (err)
2.314 + {
2.315 + g_printerr("Bookloupe: %s: options.%s: %s\n",
2.316 + path,keys[i],err->message);
2.317 + g_clear_error(&err);
2.318 + }
2.319 + else
2.320 + {
2.321 + if (options[j].flags&G_OPTION_FLAG_REVERSE)
2.322 + sw=!sw;
2.323 + *(gboolean *)options[j].arg_data=sw;
2.324 + }
2.325 + break;
2.326 + }
2.327 + else if (options[j].arg==G_OPTION_ARG_STRING)
2.328 + {
2.329 + s=g_key_file_get_string(config,"options",keys[i],
2.330 + &err);
2.331 + if (err)
2.332 + {
2.333 + g_printerr("Bookloupe: %s: options.%s: %s\n",
2.334 + path,keys[i],err->message);
2.335 + g_clear_error(&err);
2.336 + }
2.337 + else
2.338 + {
2.339 + g_free(*(gchar **)options[j].arg_data);
2.340 + if (!g_strcmp0(s,"auto"))
2.341 + {
2.342 + *(gchar **)options[j].arg_data=NULL;
2.343 + g_free(s);
2.344 + }
2.345 + else
2.346 + *(gchar **)options[j].arg_data=s;
2.347 + }
2.348 + break;
2.349 + }
2.350 + else
2.351 + g_assert_not_reached();
2.352 + }
2.353 + }
2.354 + if (!options[j].long_name)
2.355 + g_printerr("Bookloupe: %s: Unknown option \"%s\" ignored\n",
2.356 + path,keys[i]);
2.357 + }
2.358 + g_strfreev(keys);
2.359 + }
2.360 + if (config)
2.361 + g_free(path);
2.362 +}
2.363 +
2.364 void parse_options(int *argc,char ***argv)
2.365 {
2.366 GError *err=NULL;
2.367 GOptionContext *context;
2.368 + GOptionGroup *compatibility;
2.369 context=g_option_context_new(
2.370 - "file - looks for errors in Project Gutenberg(TM) etexts");
2.371 + "file - look for errors in Project Gutenberg(TM) etexts");
2.372 g_option_context_add_main_entries(context,options,NULL);
2.373 + g_option_context_add_main_entries(context,config_options,NULL);
2.374 + compatibility=g_option_group_new("compatibility",
2.375 + "Options for Compatibility with Gutcheck:",
2.376 + "Show compatibility options",NULL,NULL);
2.377 + g_option_group_add_entries(compatibility,compatibility_options);
2.378 + g_option_context_add_group(context,compatibility);
2.379 + g_option_context_set_description(context,
2.380 + "For simplicity, only the switch options which reverse the\n"
2.381 + "default configuration are listed. In most cases, both vanilla\n"
2.382 + "and \"no-\" prefixed versions are available for use.");
2.383 if (!g_option_context_parse(context,argc,argv,&err))
2.384 {
2.385 g_printerr("Bookloupe: %s\n",err->message);
2.386 g_printerr("Use \"%s --help\" for help\n",(*argv)[0]);
2.387 exit(1);
2.388 }
2.389 - /* Paranoid checking is turned OFF, not on, by its switch */
2.390 - pswit[PARANOID_SWITCH]=!pswit[PARANOID_SWITCH];
2.391 - if (pswit[PARANOID_SWITCH])
2.392 - /* if running in paranoid mode, typo checks default to enabled */
2.393 + if (typo_compat)
2.394 pswit[TYPO_SWITCH]=!pswit[TYPO_SWITCH];
2.395 - /* Line-end checking is turned OFF, not on, by its switch */
2.396 - pswit[LINE_END_SWITCH]=!pswit[LINE_END_SWITCH];
2.397 - /* Echoing is turned OFF, not on, by its switch */
2.398 - pswit[ECHO_SWITCH]=!pswit[ECHO_SWITCH];
2.399 - if (pswit[OVERVIEW_SWITCH])
2.400 - /* just print summary; don't echo */
2.401 - pswit[ECHO_SWITCH]=FALSE;
2.402 + if (paranoid_compat)
2.403 + {
2.404 + pswit[PARANOID_SWITCH]=!pswit[PARANOID_SWITCH];
2.405 + pswit[TYPO_SWITCH]=!pswit[TYPO_SWITCH];
2.406 + }
2.407 /*
2.408 * Web uploads - for the moment, this is really just a placeholder
2.409 * until we decide what processing we really want to do on web uploads
2.410 @@ -246,6 +558,21 @@
2.411 pswit[USERTYPO_SWITCH]=FALSE;
2.412 pswit[DP_SWITCH]=FALSE;
2.413 }
2.414 + if (opt_charset && !set_charset(opt_charset,&err))
2.415 + {
2.416 + g_printerr("%s\n",err->message);
2.417 + exit(1);
2.418 + }
2.419 + if (pswit[DUMP_CONFIG_SWITCH])
2.420 + {
2.421 + dump_config();
2.422 + exit(0);
2.423 + }
2.424 + g_free(opt_charset);
2.425 + opt_charset=NULL;
2.426 + if (pswit[OVERVIEW_SWITCH])
2.427 + /* just print summary; don't echo */
2.428 + pswit[ECHO_SWITCH]=FALSE;
2.429 if (*argc<2)
2.430 {
2.431 proghelp(context);
2.432 @@ -305,7 +632,11 @@
2.433 exit(1);
2.434 }
2.435 if (g_utf8_validate(contents,len,NULL))
2.436 + {
2.437 utf8=g_utf8_normalize(contents,len,G_NORMALIZE_DEFAULT_COMPOSE);
2.438 + if (!charset)
2.439 + (void)set_charset("UNICODE",NULL);
2.440 + }
2.441 else
2.442 utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
2.443 g_free(contents);
2.444 @@ -388,6 +719,15 @@
2.445 saved_cp=GetConsoleOutputCP();
2.446 #endif
2.447 running_from=g_path_get_dirname(argv[0]);
2.448 + /* Paranoid checking is turned OFF, not on, by its switch */
2.449 + pswit[PARANOID_SWITCH]=TRUE;
2.450 + /* if running in paranoid mode, typo checks default to enabled */
2.451 + pswit[TYPO_SWITCH]=TRUE;
2.452 + /* Line-end checking is turned OFF, not on, by its switch */
2.453 + pswit[LINE_END_SWITCH]=TRUE;
2.454 + /* Echoing is turned OFF, not on, by its switch */
2.455 + pswit[ECHO_SWITCH]=TRUE;
2.456 + parse_config_file();
2.457 parse_options(&argc,&argv);
2.458 if (pswit[USERTYPO_SWITCH])
2.459 read_user_scannos();
2.460 @@ -428,6 +768,9 @@
2.461 g_free(running_from);
2.462 if (usertypo)
2.463 g_tree_unref(usertypo);
2.464 + set_charset(NULL,NULL);
2.465 + if (config)
2.466 + g_key_file_free(config);
2.467 return 0;
2.468 }
2.469
2.470 @@ -708,25 +1051,32 @@
2.471 "Not reporting them.\n",
2.472 results->spacedash+results->non_PG_space_emdash);
2.473 }
2.474 - /* If more than a quarter of characters are hi-bit, bug out. */
2.475 - warnings.bin=1;
2.476 - if (results->binlen*4>results->totlen)
2.477 + if (charset)
2.478 + warnings.bin=0;
2.479 + else
2.480 {
2.481 - g_print(" --> This file does not appear to be ASCII. "
2.482 - "Terminating. Best of luck with it!\n");
2.483 - exit(1);
2.484 - }
2.485 - if (results->alphalen*4<results->totlen)
2.486 - {
2.487 - g_print(" --> This file does not appear to be text. "
2.488 - "Terminating. Best of luck with it!\n");
2.489 - exit(1);
2.490 - }
2.491 - if (results->binlen*100>results->totlen || results->binlen>100)
2.492 - {
2.493 - g_print(" --> There are a lot of foreign letters here. "
2.494 - "Not reporting them.\n");
2.495 - warnings.bin=0;
2.496 + /* Charset ISO_8859-1/ASCII checks for compatibility with gutcheck */
2.497 + warnings.bin=1;
2.498 + /* If more than a quarter of characters are hi-bit, bug out. */
2.499 + if (results->binlen*4>results->totlen)
2.500 + {
2.501 + g_print(" --> This file does not appear to be ASCII. "
2.502 + "Terminating. Best of luck with it!\n");
2.503 + exit(1);
2.504 + }
2.505 + if (results->alphalen*4<results->totlen)
2.506 + {
2.507 + g_print(" --> This file does not appear to be text. "
2.508 + "Terminating. Best of luck with it!\n");
2.509 + exit(1);
2.510 + }
2.511 + if (results->binlen*100>results->totlen || results->binlen>100)
2.512 + {
2.513 + g_print(" --> There are a lot of foreign letters here. "
2.514 + "Not reporting them.\n");
2.515 + if (!pswit[VERBOSE_SWITCH])
2.516 + warnings.bin=0;
2.517 + }
2.518 }
2.519 warnings.isDutch=FALSE;
2.520 if (results->Dutchcount>50)
2.521 @@ -754,7 +1104,6 @@
2.522 g_print("\n");
2.523 if (pswit[VERBOSE_SWITCH])
2.524 {
2.525 - warnings.bin=1;
2.526 warnings.shortline=1;
2.527 warnings.dotcomma=1;
2.528 warnings.longline=1;
2.529 @@ -949,14 +1298,17 @@
2.530 gboolean isemptyline)
2.531 {
2.532 /* Don't repeat multiple warnings on one line. */
2.533 - gboolean eNon_A=FALSE,eTab=FALSE,eTilde=FALSE;
2.534 + gboolean eInvalidChar=FALSE,eTab=FALSE,eTilde=FALSE;
2.535 gboolean eCarat=FALSE,eFSlash=FALSE,eAst=FALSE;
2.536 const char *s;
2.537 gunichar c;
2.538 + gsize nb;
2.539 + gchar *t;
2.540 for (s=aline;*s;s=g_utf8_next_char(s))
2.541 {
2.542 c=g_utf8_get_char(s);
2.543 - if (!eNon_A && (c<CHAR_SPACE && c!='\t' && c!='\n' || c>127))
2.544 + if (warnings->bin && !eInvalidChar &&
2.545 + (c<CHAR_SPACE && c!='\t' && c!='\n' || c>127))
2.546 {
2.547 if (pswit[ECHO_SWITCH])
2.548 g_print("\n%s\n",aline);
2.549 @@ -971,7 +1323,57 @@
2.550 linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
2.551 else
2.552 cnt_bin++;
2.553 - eNon_A=TRUE;
2.554 + eInvalidChar=TRUE;
2.555 + }
2.556 + if (!eInvalidChar && charset)
2.557 + {
2.558 + if (charset_validator==(GIConv)-1)
2.559 + {
2.560 + if (!g_unichar_isdefined(c))
2.561 + {
2.562 + if (pswit[ECHO_SWITCH])
2.563 + g_print("\n%s\n",aline);
2.564 + if (!pswit[OVERVIEW_SWITCH])
2.565 + g_print(" Line %ld column %ld - Unassigned UNICODE "
2.566 + "code point U+%04" G_GINT32_MODIFIER "X\n",
2.567 + linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
2.568 + else
2.569 + cnt_bin++;
2.570 + eInvalidChar=TRUE;
2.571 + }
2.572 + else if (c>=0xE000 && c<=0xF8FF || c>=0xF0000 && c<=0xFFFFD ||
2.573 + c>=100000 && c<=0x10FFFD)
2.574 + {
2.575 + if (pswit[ECHO_SWITCH])
2.576 + g_print("\n%s\n",aline);
2.577 + if (!pswit[OVERVIEW_SWITCH])
2.578 + g_print(" Line %ld column %ld - Private Use "
2.579 + "character U+%04" G_GINT32_MODIFIER "X\n",
2.580 + linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
2.581 + else
2.582 + cnt_bin++;
2.583 + eInvalidChar=TRUE;
2.584 + }
2.585 + }
2.586 + else
2.587 + {
2.588 + t=g_convert_with_iconv(s,g_utf8_next_char(s)-s,
2.589 + charset_validator,NULL,&nb,NULL);
2.590 + if (t)
2.591 + g_free(t);
2.592 + else
2.593 + {
2.594 + if (pswit[ECHO_SWITCH])
2.595 + g_print("\n%s\n",aline);
2.596 + if (!pswit[OVERVIEW_SWITCH])
2.597 + g_print(" Line %ld column %ld - Non-%s "
2.598 + "character %u\n",linecnt,
2.599 + g_utf8_pointer_to_offset(aline,s)+1,charset,c);
2.600 + else
2.601 + cnt_bin++;
2.602 + eInvalidChar=TRUE;
2.603 + }
2.604 + }
2.605 }
2.606 if (!eTab && c==CHAR_TAB)
2.607 {
2.608 @@ -2626,8 +3028,7 @@
2.609 if (s>=aline && g_utf8_get_char(s)=='-')
2.610 enddash=TRUE;
2.611 check_for_control_characters(aline);
2.612 - if (warnings->bin)
2.613 - check_for_odd_characters(aline,warnings,isemptyline);
2.614 + check_for_odd_characters(aline,warnings,isemptyline);
2.615 if (warnings->longline)
2.616 check_for_long_line(aline);
2.617 if (warnings->shortline)
3.1 --- a/bookloupe/bookloupe.h Thu Oct 03 22:59:44 2013 +0100
3.2 +++ b/bookloupe/bookloupe.h Thu Oct 03 23:00:49 2013 +0100
3.3 @@ -55,6 +55,7 @@
3.4 MARKUP_SWITCH,
3.5 USERTYPO_SWITCH,
3.6 DP_SWITCH,
3.7 + DUMP_CONFIG_SWITCH,
3.8 SWITNO
3.9 };
3.10
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
4.2 +++ b/sample.ini Thu Oct 03 23:00:49 2013 +0100
4.3 @@ -0,0 +1,33 @@
4.4 +# Default configuration for bookloupe
4.5 +
4.6 +[options]
4.7 +# Ignore DP-specific markup
4.8 +dp=false
4.9 +# Echo queried line
4.10 +echo=true
4.11 +# Check single quotes
4.12 +squote=false
4.13 +# Check common typos
4.14 +typo=true
4.15 +# Require closure of quotes on every paragraph
4.16 +qpara=false
4.17 +# Enable paranoid querying of everything
4.18 +paranoid=true
4.19 +# Enable line end checking
4.20 +line-end=true
4.21 +# Overview: just show counts
4.22 +overview=false
4.23 +# Output errors to stdout instead of stderr
4.24 +stdout=false
4.25 +# Echo header fields
4.26 +header=false
4.27 +# Ignore markup in < >
4.28 +markup=false
4.29 +# Use file of user-defined typos
4.30 +usertypo=false
4.31 +# Defaults for use on www upload
4.32 +web=false
4.33 +# Verbose - list everything
4.34 +verbose=false
4.35 +# Set of characters valid for this ebook
4.36 +charset=auto
5.1 --- a/test/bookloupe/Makefile.am Thu Oct 03 22:59:44 2013 +0100
5.2 +++ b/test/bookloupe/Makefile.am Thu Oct 03 23:00:49 2013 +0100
5.3 @@ -1,5 +1,7 @@
5.4 TESTS_ENVIRONMENT=BOOKLOUPE=../../bookloupe/bookloupe ../harness/loupe-test
5.5 TESTS=non-ascii.tst long-line.tst curved-single-quotes.tst curved-quotes.tst \
5.6 - runfox-quotes.tst curved-genitives.tst multi-line-illustration.tst
5.7 + runfox-quotes.tst curved-genitives.tst multi-line-illustration.tst \
5.8 + config-internal.tst config-default.tst config-user.tst \
5.9 + config-override.tst charset-cp1252.tst charset-latin1.tst
5.10
5.11 dist_pkgdata_DATA=$(TESTS)
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
6.2 +++ b/test/bookloupe/charset-cp1252.tst Thu Oct 03 23:00:49 2013 +0100
6.3 @@ -0,0 +1,16 @@
6.4 +**************** OPTIONS ****************
6.5 +--charset=WINDOWS-1252
6.6 +**************** ENCODING ****************
6.7 +WINDOWS-1252
6.8 +**************** INPUT ****************
6.9 +Unless binary mode is engaged, gutcheck will warn about a number of
6.10 +characters defined in Windows-1252. Bookloupe provides support for
6.11 +disabling such checks without concern as to the file size and how
6.12 +many characters with the eighth bit set it may contain by allowing a
6.13 +character set to be declared. With the character set declared as
6.14 +WINDOWS-1252, all characters defined in Windows-1252 shoud be acceptable
6.15 +and no warnings should be issued.
6.16 +
6.17 +We test for this by including just one such character—the em dash.
6.18 +
6.19 +**************** EXPECTED ****************
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
7.2 +++ b/test/bookloupe/charset-latin1.tst Thu Oct 03 23:00:49 2013 +0100
7.3 @@ -0,0 +1,58 @@
7.4 +**************** OPTIONS ****************
7.5 +--charset=ISO-8859-1
7.6 +**************** ENCODING ****************
7.7 +WINDOWS-1252
7.8 +**************** INPUT ****************
7.9 +Where the character set declared is narrower than the character set
7.10 +implied by the encoding as in this case (Windows-1252 is a superset
7.11 +of the first latin alphabet defined in ECMA 94), then bookloupe should
7.12 +warn about characters that are not in the declared character set but
7.13 +should still recognise them and otherwise handle them as it would
7.14 +normally do. We use the curved apostrophe as a test for this since
7.15 +if bookloupe didn't recognise it then it would query the orphaned
7.16 +letters from the genitives and abbreviations.
7.17 +
7.18 +John Hendricks was bear-leading at the time. He had originally studied
7.19 +for Holy Orders, but had abandoned the Church later for private reasons
7.20 +connected with his faith, and had taken to teaching and tutoring
7.21 +instead. He was an honest, upstanding fellow of five-and-thirty,
7.22 +incorruptible, intelligent in a simple, straightforward way. He played
7.23 +games with his head, more than most Englishmen do, but he went through
7.24 +life without much calculation. He had qualities that made boys like
7.25 +and respect him; he won their confidence. Poor, proud, ambitious,
7.26 +he realised that fate offered him a chance when the Secretary of
7.27 +State for Scotland asked him if he would give up his other pupils
7.28 +for a year and take his son, Lord Ernie, round the world upon an
7.29 +educational trip that might make a man of him. For Lord Ernie was the
7.30 +only son, and the Marquess’s influence was naturally great. To have
7.31 +deposited a regenerated Lord Ernie at the castle gates might have
7.32 +guaranteed Hendricks’ future. After leaving Eton prematurely the lad
7.33 +had come under Hendricks’ charge for a time, and with such excellent
7.34 +results--‘I’d simply swear by that chap, you know,’ the boy used
7.35 +to say--that his father, considerably impressed, and rather as a
7.36 +last resort, had made this proposition. And Hendricks, without much
7.37 +calculation, had accepted it. He liked ‘Bindy’ for himself. It was
7.38 +in his heart to ‘make a man of him,’ if possible. They had now been
7.39 +round the world together and had come up from Brindisi to the Italian
7.40 +Lakes, and so into Switzerland. It was middle October. With a week or
7.41 +two to spare they were making leisurely for the ancestral halls in
7.42 +Aberdeenshire.
7.43 +**************** EXPECTED ****************
7.44 +
7.45 +only son, and the Marquess’s influence was naturally great. To have
7.46 + Line 22 column 27 - Non-ISO-8859-1 character 8217
7.47 +
7.48 +guaranteed Hendricks’ future. After leaving Eton prematurely the lad
7.49 + Line 24 column 21 - Non-ISO-8859-1 character 8217
7.50 +
7.51 +had come under Hendricks’ charge for a time, and with such excellent
7.52 + Line 25 column 25 - Non-ISO-8859-1 character 8217
7.53 +
7.54 +results--‘I’d simply swear by that chap, you know,’ the boy used
7.55 + Line 26 column 10 - Non-ISO-8859-1 character 8216
7.56 +
7.57 +calculation, had accepted it. He liked ‘Bindy’ for himself. It was
7.58 + Line 29 column 40 - Non-ISO-8859-1 character 8216
7.59 +
7.60 +in his heart to ‘make a man of him,’ if possible. They had now been
7.61 + Line 30 column 17 - Non-ISO-8859-1 character 8216
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
8.2 +++ b/test/bookloupe/config-default.tst Thu Oct 03 23:00:49 2013 +0100
8.3 @@ -0,0 +1,66 @@
8.4 +**************** OPTIONS ****************
8.5 +--dump-config
8.6 +**************** INPUT(bookloupe.ini) ****************
8.7 +# Default configuration for bookloupe
8.8 +
8.9 +[options]
8.10 +# Ignore DP-specific markup
8.11 +dp=false
8.12 +# Echo queried line
8.13 +echo=true
8.14 +# Check single quotes
8.15 +squote=false
8.16 +# Check common typos
8.17 +typo=true
8.18 +# Require closure of quotes on every paragraph
8.19 +qpara=false
8.20 +# Enable paranoid querying of everything
8.21 +paranoid=true
8.22 +# Enable line end checking
8.23 +line-end=true
8.24 +# Overview: just show counts
8.25 +overview=false
8.26 +# Output errors to stdout instead of stderr
8.27 +stdout=false
8.28 +# Echo header fields
8.29 +header=false
8.30 +# Ignore markup in < >
8.31 +markup=false
8.32 +# Use file of user-defined typos
8.33 +usertypo=false
8.34 +# Verbose - list everything
8.35 +verbose=false
8.36 +# Set of characters valid for this ebook
8.37 +charset=auto
8.38 +**************** EXPECTED(stdout) ****************
8.39 +# Default configuration for bookloupe
8.40 +
8.41 +[options]
8.42 +# Ignore DP-specific markup
8.43 +dp=false
8.44 +# Echo queried line
8.45 +echo=true
8.46 +# Check single quotes
8.47 +squote=false
8.48 +# Check common typos
8.49 +typo=true
8.50 +# Require closure of quotes on every paragraph
8.51 +qpara=false
8.52 +# Enable paranoid querying of everything
8.53 +paranoid=true
8.54 +# Enable line end checking
8.55 +line-end=true
8.56 +# Overview: just show counts
8.57 +overview=false
8.58 +# Output errors to stdout instead of stderr
8.59 +stdout=false
8.60 +# Echo header fields
8.61 +header=false
8.62 +# Ignore markup in < >
8.63 +markup=false
8.64 +# Use file of user-defined typos
8.65 +usertypo=false
8.66 +# Verbose - list everything
8.67 +verbose=false
8.68 +# Set of characters valid for this ebook
8.69 +charset=auto
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
9.2 +++ b/test/bookloupe/config-internal.tst Thu Oct 03 23:00:49 2013 +0100
9.3 @@ -0,0 +1,34 @@
9.4 +**************** OPTIONS ****************
9.5 +--dump-config
9.6 +**************** EXPECTED(stdout) ****************
9.7 +# Default configuration for bookloupe
9.8 +
9.9 +[options]
9.10 +# Ignore DP-specific markup
9.11 +dp=false
9.12 +# Echo queried line
9.13 +echo=true
9.14 +# Check single quotes
9.15 +squote=false
9.16 +# Check common typos
9.17 +typo=true
9.18 +# Require closure of quotes on every paragraph
9.19 +qpara=false
9.20 +# Enable paranoid querying of everything
9.21 +paranoid=true
9.22 +# Enable line end checking
9.23 +line-end=true
9.24 +# Overview: just show counts
9.25 +overview=false
9.26 +# Output errors to stdout instead of stderr
9.27 +stdout=false
9.28 +# Echo header fields
9.29 +header=false
9.30 +# Ignore markup in < >
9.31 +markup=false
9.32 +# Use file of user-defined typos
9.33 +usertypo=false
9.34 +# Verbose - list everything
9.35 +verbose=false
9.36 +# Set of characters valid for this ebook
9.37 +charset=auto
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
10.2 +++ b/test/bookloupe/config-override.tst Thu Oct 03 23:00:49 2013 +0100
10.3 @@ -0,0 +1,68 @@
10.4 +**************** OPTIONS ****************
10.5 +--usertypo
10.6 +--charset=auto
10.7 +--dump-config
10.8 +**************** INPUT(bookloupe.ini) ****************
10.9 +# Relaxed configuration for bookloupe
10.10 +
10.11 +[options]
10.12 +# Ignore DP-specific markup
10.13 +dp=false
10.14 +# Echo queried line
10.15 +echo=true
10.16 +# Check single quotes
10.17 +squote=false
10.18 +# Check common typos
10.19 +typo=true
10.20 +# Require closure of quotes on every paragraph
10.21 +qpara=false
10.22 +# Enable paranoid querying of everything
10.23 +paranoid=false
10.24 +# Enable line end checking
10.25 +line-end=true
10.26 +# Overview: just show counts
10.27 +overview=false
10.28 +# Output errors to stdout instead of stderr
10.29 +stdout=false
10.30 +# Echo header fields
10.31 +header=false
10.32 +# Ignore markup in < >
10.33 +markup=false
10.34 +# Use file of user-defined typos
10.35 +usertypo=false
10.36 +# Verbose - list everything
10.37 +verbose=false
10.38 +# Set of characters valid for this ebook
10.39 +charset=UNICODE
10.40 +**************** EXPECTED(stdout) ****************
10.41 +# Relaxed configuration for bookloupe
10.42 +
10.43 +[options]
10.44 +# Ignore DP-specific markup
10.45 +dp=false
10.46 +# Echo queried line
10.47 +echo=true
10.48 +# Check single quotes
10.49 +squote=false
10.50 +# Check common typos
10.51 +typo=true
10.52 +# Require closure of quotes on every paragraph
10.53 +qpara=false
10.54 +# Enable paranoid querying of everything
10.55 +paranoid=false
10.56 +# Enable line end checking
10.57 +line-end=true
10.58 +# Overview: just show counts
10.59 +overview=false
10.60 +# Output errors to stdout instead of stderr
10.61 +stdout=false
10.62 +# Echo header fields
10.63 +header=false
10.64 +# Ignore markup in < >
10.65 +markup=false
10.66 +# Use file of user-defined typos
10.67 +usertypo=true
10.68 +# Verbose - list everything
10.69 +verbose=false
10.70 +# Set of characters valid for this ebook
10.71 +charset=auto
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
11.2 +++ b/test/bookloupe/config-user.tst Thu Oct 03 23:00:49 2013 +0100
11.3 @@ -0,0 +1,76 @@
11.4 +**************** OPTIONS ****************
11.5 +--dump-config
11.6 +**************** INPUT(bookloupe.ini) ****************
11.7 +# Mary Contrary's configuration for bookloupe
11.8 +
11.9 +# Bookloupe will ignore this group, but it's nice to have.
11.10 +[other]
11.11 +# Look at me!
11.12 +name="Mary Contrary"
11.13 +
11.14 +[options]
11.15 +# Ignore DP-specific markup - sounds useful
11.16 +dp=true
11.17 +# Echo queried line - what's the point of that?
11.18 +echo=false
11.19 +# Check single quotes - yup
11.20 +squote=true
11.21 +# Check common typos - waste of time
11.22 +typo=false
11.23 +# Require closure of quotes on every paragraph - okay
11.24 +qpara=true
11.25 +# Enable paranoid querying of everything - Huh?
11.26 +paranoid=false
11.27 +# Enable line end checking - pointless
11.28 +line-end=false
11.29 +# Overview: just show counts - Brief is good
11.30 +overview=true
11.31 +# Output errors to stdout instead of stderr - keeps things together
11.32 +stdout=true
11.33 +# Echo header fields - I'd rather see it
11.34 +header=true
11.35 +# Ignore markup in < > - Need this
11.36 +markup=true
11.37 +# Use file of user-defined typos - And this
11.38 +usertypo=true
11.39 +# Verbose - list everything - Contrary by name...
11.40 +verbose=true
11.41 +# Set of characters valid for this ebook - Let's stick with Latin1
11.42 +charset=ISO-8859-1
11.43 +**************** EXPECTED(stdout) ****************
11.44 +# Mary Contrary's configuration for bookloupe
11.45 +
11.46 +# Bookloupe will ignore this group, but it's nice to have.
11.47 +[other]
11.48 +# Look at me!
11.49 +name="Mary Contrary"
11.50 +
11.51 +[options]
11.52 +# Ignore DP-specific markup - sounds useful
11.53 +dp=true
11.54 +# Echo queried line - what's the point of that?
11.55 +echo=false
11.56 +# Check single quotes - yup
11.57 +squote=true
11.58 +# Check common typos - waste of time
11.59 +typo=false
11.60 +# Require closure of quotes on every paragraph - okay
11.61 +qpara=true
11.62 +# Enable paranoid querying of everything - Huh?
11.63 +paranoid=false
11.64 +# Enable line end checking - pointless
11.65 +line-end=false
11.66 +# Overview: just show counts - Brief is good
11.67 +overview=true
11.68 +# Output errors to stdout instead of stderr - keeps things together
11.69 +stdout=true
11.70 +# Echo header fields - I'd rather see it
11.71 +header=true
11.72 +# Ignore markup in < > - Need this
11.73 +markup=true
11.74 +# Use file of user-defined typos - And this
11.75 +usertypo=true
11.76 +# Verbose - list everything - Contrary by name...
11.77 +verbose=true
11.78 +# Set of characters valid for this ebook - Let's stick with Latin1
11.79 +charset=ISO-8859-1
12.1 --- a/test/compatibility/Makefile.am Thu Oct 03 22:59:44 2013 +0100
12.2 +++ b/test/compatibility/Makefile.am Thu Oct 03 23:00:49 2013 +0100
12.3 @@ -6,6 +6,7 @@
12.4 user-defined-typo.tst brackets.tst single-quotes.tst grave-quotes.tst \
12.5 dashes.tst control-characters.tst unusual-characters.tst \
12.6 windows-1252.tst periods.tst long-line.tst unmarked-paragraph.tst \
12.7 + paranoid.tst paranoid-typos.tst no-paranoid.tst no-paranoid-typos.tst \
12.8 hebe-jeebies.tst mail-from.tst scannos.tst before-comma.tst \
12.9 before-period.tst double-punctuation.tst genitives.tst embedded-cr.tst \
12.10 continuing-quotes.tst
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
13.2 +++ b/test/compatibility/no-paranoid-typos.tst Thu Oct 03 23:00:49 2013 +0100
13.3 @@ -0,0 +1,12 @@
13.4 +**************** OPTIONS ****************
13.5 +-x
13.6 +-t
13.7 +**************** INPUT ****************
13.8 +In paranoid mode we check for a standalone digits. 1 think this is a useful
13.9 +feature. When checking for typos every, strangly placed comma is reported.
13.10 +
13.11 +If paranoid mode is switched off, we can still check for typos.
13.12 +**************** EXPECTED ****************
13.13 +
13.14 +feature. When checking for typos every, strangly placed comma is reported.
13.15 + Line 2 column 39 - Query punctuation after every?
14.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
14.2 +++ b/test/compatibility/no-paranoid.tst Thu Oct 03 23:00:49 2013 +0100
14.3 @@ -0,0 +1,8 @@
14.4 +**************** OPTIONS ****************
14.5 +-x
14.6 +**************** INPUT ****************
14.7 +In paranoid mode we check for a standalone digits. 1 think this is a useful
14.8 +feature. When checking for typos every, strangly placed comma is reported.
14.9 +
14.10 +If paranoid mode is switched off, checking for typos defaults to off too.
14.11 +**************** EXPECTED ****************
15.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
15.2 +++ b/test/compatibility/paranoid-typos.tst Thu Oct 03 23:00:49 2013 +0100
15.3 @@ -0,0 +1,12 @@
15.4 +**************** OPTIONS ****************
15.5 +-t
15.6 +**************** INPUT ****************
15.7 +In paranoid mode we check for a standalone digits. 1 think this is a useful
15.8 +feature. When checking for typos every, strangly placed comma is reported.
15.9 +
15.10 +In paranoid mode (the default), typo checking is switched off with its
15.11 +short option.
15.12 +**************** EXPECTED ****************
15.13 +
15.14 +In paranoid mode we check for a standalone digits. 1 think this is a useful
15.15 + Line 1 column 51 - Query standalone 1
16.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
16.2 +++ b/test/compatibility/paranoid.tst Thu Oct 03 23:00:49 2013 +0100
16.3 @@ -0,0 +1,12 @@
16.4 +**************** INPUT ****************
16.5 +In paranoid mode we check for a standalone digits. 1 think this is a useful
16.6 +feature. When checking for typos every, strangly placed comma is reported.
16.7 +
16.8 +By default, both paranoid mode and checking for typos should be on.
16.9 +**************** EXPECTED ****************
16.10 +
16.11 +In paranoid mode we check for a standalone digits. 1 think this is a useful
16.12 + Line 1 column 51 - Query standalone 1
16.13 +
16.14 +feature. When checking for typos every, strangly placed comma is reported.
16.15 + Line 2 column 39 - Query punctuation after every?
17.1 --- a/test/harness/Makefile.am Thu Oct 03 22:59:44 2013 +0100
17.2 +++ b/test/harness/Makefile.am Thu Oct 03 23:00:49 2013 +0100
17.3 @@ -5,5 +5,6 @@
17.4
17.5 loupe_test_SOURCES=loupe-test.c testcase.c testcase.h testcaseio.c \
17.6 testcaseio.h testcaseparser.c testcaseparser.h testcaseinput.c \
17.7 - testcaseinput.h warningsparser.c warningsparser.h
17.8 + testcaseinput.h testcaseoutput.c testcaseoutput.h warningsparser.c \
17.9 + warningsparser.h
17.10 loupe_test_LDADD=../../bl/libbl.la
18.1 --- a/test/harness/loupe-test.c Thu Oct 03 22:59:44 2013 +0100
18.2 +++ b/test/harness/loupe-test.c Thu Oct 03 23:00:49 2013 +0100
18.3 @@ -48,6 +48,7 @@
18.4 exit(1);
18.5 }
18.6 bl_set_print_handlers();
18.7 + g_setenv("BOOKLOUPE_CONFIG_PATH",".",TRUE);
18.8 for(i=1;i<argc;i++)
18.9 pass&=run_test(argv[i]);
18.10 return pass?0:1;
19.1 --- a/test/harness/testcase.c Thu Oct 03 22:59:44 2013 +0100
19.2 +++ b/test/harness/testcase.c Thu Oct 03 23:00:49 2013 +0100
19.3 @@ -7,6 +7,7 @@
19.4 #include <bl/bl.h>
19.5 #include "testcase.h"
19.6 #include "testcaseinput.h"
19.7 +#include "testcaseoutput.h"
19.8
19.9 GQuark testcase_error_quark(void)
19.10 {
19.11 @@ -171,6 +172,64 @@
19.12 return g_string_free(filename,FALSE);
19.13 }
19.14
19.15 +/*
19.16 + * Verify that all the output files specified by a testcase are present
19.17 + * with the expected contents.
19.18 + */
19.19 +gboolean testcase_verify_output_files(Testcase *testcase)
19.20 +{
19.21 + GSList *link;
19.22 + GError *tmp_err=NULL;
19.23 + gboolean retval=TRUE;
19.24 + ssize_t offset;
19.25 + gchar *contents;
19.26 + TestcaseOutput *output;
19.27 + for(link=testcase->outputs;link;link=link->next)
19.28 + {
19.29 + output=link->data;
19.30 + if (!testcase_output_read(testcase,output,&contents,NULL,&tmp_err))
19.31 + {
19.32 + g_print("%s: FAIL\n",testcase->basename);
19.33 + g_print("%s\n",tmp_err->message);
19.34 + g_clear_error(&tmp_err);
19.35 + retval=FALSE;
19.36 + break;
19.37 + }
19.38 + else
19.39 + {
19.40 + if (strcmp(contents,output->contents))
19.41 + {
19.42 + g_print("%s: FAIL\n",testcase->basename);
19.43 + offset=common_prefix_length(contents,output->contents);
19.44 + if (!offset && !contents[offset])
19.45 + g_print("%s: Unexpected empty output from bookloupe.\n",
19.46 + output->name);
19.47 + else
19.48 + {
19.49 + g_print("%s: Unexpected output from bookloupe:\n",
19.50 + output->name);
19.51 + print_unexpected(contents,offset);
19.52 + }
19.53 + retval=FALSE;
19.54 + }
19.55 + g_free(contents);
19.56 + break;
19.57 + }
19.58 + }
19.59 + for(link=testcase->outputs;link;link=link->next)
19.60 + if (!testcase_output_remove(testcase,link->data,&tmp_err))
19.61 + {
19.62 + if (retval)
19.63 + {
19.64 + g_print("%s: FAIL\n",testcase->basename);
19.65 + g_print("%s\n",tmp_err->message);
19.66 + retval=TRUE;
19.67 + }
19.68 + g_clear_error(&tmp_err);
19.69 + }
19.70 + return retval;
19.71 +}
19.72 +
19.73 gboolean testcase_spawn_bookloupe(Testcase *testcase,char **standard_output,
19.74 GError **error)
19.75 {
19.76 @@ -460,7 +519,7 @@
19.77 gboolean r;
19.78 size_t pos,offset;
19.79 GString *header;
19.80 - char *output,*filename,*s,*xfail=NULL;
19.81 + char *filename,*s,*xfail=NULL;
19.82 GError *error=NULL;
19.83 if (!testcase_create_input_files(testcase,&error))
19.84 {
19.85 @@ -469,7 +528,7 @@
19.86 g_error_free(error);
19.87 return FALSE;
19.88 }
19.89 - r=testcase_spawn_bookloupe(testcase,&output,&error);
19.90 + r=testcase_spawn_bookloupe(testcase,&testcase->test_output,&error);
19.91 if (!r)
19.92 {
19.93 g_print("%s: FAIL\n",testcase->basename);
19.94 @@ -486,35 +545,40 @@
19.95 g_error_free(error);
19.96 return FALSE;
19.97 }
19.98 - header=g_string_new("\n\nFile: ");
19.99 - g_string_append(header,filename);
19.100 - g_string_append(header,"\n");
19.101 - if (!g_str_has_prefix(output,header->str))
19.102 + if (testcase->expected || testcase->warnings)
19.103 {
19.104 - g_print("%s: FAIL\n",testcase->basename);
19.105 - g_print("Unexpected header from bookloupe:\n");
19.106 - offset=common_prefix_length(output,header->str);
19.107 - print_unexpected(output,offset);
19.108 - r=FALSE;
19.109 - }
19.110 - pos=header->len;
19.111 - if (r)
19.112 - {
19.113 - /* Skip the summary */
19.114 - s=strstr(output+pos,"\n\n");
19.115 - if (s)
19.116 - pos=s-output+2;
19.117 - else
19.118 + header=g_string_new("\n\nFile: ");
19.119 + g_string_append(header,filename);
19.120 + g_string_append(header,"\n");
19.121 + if (!g_str_has_prefix(testcase->test_output,header->str))
19.122 {
19.123 g_print("%s: FAIL\n",testcase->basename);
19.124 - g_print("Unterminated summary from bookloupe:\n%s\n",output+pos);
19.125 + g_print("Unexpected header from bookloupe:\n");
19.126 + offset=common_prefix_length(testcase->test_output,header->str);
19.127 + print_unexpected(testcase->test_output,offset);
19.128 r=FALSE;
19.129 }
19.130 + pos=header->len;
19.131 + if (r)
19.132 + {
19.133 + /* Skip the summary */
19.134 + s=strstr(testcase->test_output+pos,"\n\n");
19.135 + if (s)
19.136 + pos=s-testcase->test_output+2;
19.137 + else
19.138 + {
19.139 + g_print("%s: FAIL\n",testcase->basename);
19.140 + g_print("Unterminated summary from bookloupe:\n%s\n",
19.141 + testcase->test_output+pos);
19.142 + r=FALSE;
19.143 + }
19.144 + }
19.145 + g_string_free(header,TRUE);
19.146 + r=testcase_check_warnings(testcase,testcase->test_output+pos,&xfail);
19.147 }
19.148 - g_string_free(header,TRUE);
19.149 - r=testcase_check_warnings(testcase,output+pos,&xfail);
19.150 + if (!testcase_verify_output_files(testcase))
19.151 + r=FALSE;
19.152 g_free(filename);
19.153 - g_free(output);
19.154 if (r)
19.155 {
19.156 if (xfail)
19.157 @@ -575,5 +639,6 @@
19.158 g_slist_free(testcase->warnings);
19.159 g_free(testcase->encoding);
19.160 g_strfreev(testcase->options);
19.161 + g_free(testcase->test_output);
19.162 g_free(testcase);
19.163 }
20.1 --- a/test/harness/testcase.h Thu Oct 03 22:59:44 2013 +0100
20.2 +++ b/test/harness/testcase.h Thu Oct 03 23:00:49 2013 +0100
20.3 @@ -37,10 +37,12 @@
20.4 char *basename;
20.5 char *tmpdir;
20.6 GSList *inputs;
20.7 + GSList *outputs;
20.8 char *expected;
20.9 GSList *warnings;
20.10 char *encoding; /* The character encoding to talk to BOOKLOUPE in */
20.11 char **options;
20.12 + char *test_output;
20.13 enum {
20.14 TESTCASE_XFAIL=1<<0,
20.15 TESTCASE_TMP_DIR=1<<1,
21.1 --- a/test/harness/testcaseio.c Thu Oct 03 22:59:44 2013 +0100
21.2 +++ b/test/harness/testcaseio.c Thu Oct 03 23:00:49 2013 +0100
21.3 @@ -5,6 +5,7 @@
21.4 #include <bl/bl.h>
21.5 #include "testcaseparser.h"
21.6 #include "testcaseinput.h"
21.7 +#include "testcaseoutput.h"
21.8 #include "testcaseio.h"
21.9 #include "warningsparser.h"
21.10
21.11 @@ -70,6 +71,25 @@
21.12 else if (!testcase->expected && !testcase->warnings &&
21.13 !strcmp(tag,"EXPECTED"))
21.14 testcase->expected=g_strdup(text);
21.15 + else if (g_str_has_prefix(tag,"EXPECTED(") && tag[strlen(tag)-1]==')')
21.16 + {
21.17 + arg=g_strndup(tag+9,strlen(tag)-10);
21.18 + s=g_path_get_dirname(arg);
21.19 + if (strcmp(s,"."))
21.20 + {
21.21 + g_printerr("%s: Expected files may not have a "
21.22 + "directory component\n",arg);
21.23 + g_free(s);
21.24 + g_free(arg);
21.25 + testcase_free(testcase);
21.26 + testcase_parser_free(parser);
21.27 + return NULL;
21.28 + }
21.29 + g_free(s);
21.30 + testcase->outputs=g_slist_prepend(testcase->outputs,
21.31 + testcase_output_new(arg,text));
21.32 + g_free(arg);
21.33 + }
21.34 else if (!testcase->expected && !testcase->warnings &&
21.35 !strcmp(tag,"WARNINGS"))
21.36 {
21.37 @@ -88,11 +108,14 @@
21.38 }
21.39 else if (!testcase->encoding && !strcmp(tag,"ENCODING"))
21.40 testcase->encoding=g_strchomp(g_strdup(text));
21.41 - else if (!testcase->encoding && !strcmp(tag,"OPTIONS"))
21.42 + else if (!testcase->options && !strcmp(tag,"OPTIONS"))
21.43 {
21.44 testcase->options=g_strsplit(text,"\n",0);
21.45 - g_free(testcase->options[g_strv_length(testcase->options)-1]);
21.46 - testcase->options[g_strv_length(testcase->options)-1]=NULL;
21.47 + if (testcase->options && g_strv_length(testcase->options)>0)
21.48 + {
21.49 + g_free(testcase->options[g_strv_length(testcase->options)-1]);
21.50 + testcase->options[g_strv_length(testcase->options)-1]=NULL;
21.51 + }
21.52 }
21.53 else
21.54 {
22.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
22.2 +++ b/test/harness/testcaseoutput.c Thu Oct 03 23:00:49 2013 +0100
22.3 @@ -0,0 +1,140 @@
22.4 +#include <stdlib.h>
22.5 +#include <string.h>
22.6 +#include <errno.h>
22.7 +#include <glib.h>
22.8 +#include <bl/bl.h>
22.9 +#include "testcase.h"
22.10 +#include "testcaseoutput.h"
22.11 +
22.12 +/*
22.13 + * Replace \r\n with \n, \n with U+240A (visible symbol for LF)
22.14 + * and \r with U+240D (visible symbol for CR).
22.15 + */
22.16 +static char *dos2unix(const char *text)
22.17 +{
22.18 + gunichar c;
22.19 + gboolean cr=FALSE;
22.20 + const gunichar visible_lf=0x240A;
22.21 + const gunichar visible_cr=0x240D;
22.22 + GString *string;
22.23 + string=g_string_new(NULL);
22.24 + while(*text)
22.25 + {
22.26 + c=g_utf8_get_char(text);
22.27 + text=g_utf8_next_char(text);
22.28 + if (cr)
22.29 + {
22.30 + cr=FALSE;
22.31 + if (c=='\n')
22.32 + {
22.33 + g_string_append_c(string,'\n');
22.34 + continue;
22.35 + }
22.36 + else
22.37 + g_string_append_unichar(string,visible_cr);
22.38 + }
22.39 + if (c=='\r')
22.40 + cr=TRUE;
22.41 + else if (c=='\n')
22.42 + g_string_append_unichar(string,visible_lf);
22.43 + else
22.44 + g_string_append_unichar(string,c);
22.45 + }
22.46 + if (cr)
22.47 + g_string_append_unichar(string,visible_cr);
22.48 + return g_string_free(string,FALSE);
22.49 +}
22.50 +
22.51 +/*
22.52 + * Read an output file needed for a testcase (as specified in <output>).
22.53 + * The file is read in the encoding specified for communicating with
22.54 + * bookloupe.
22.55 + */
22.56 +gboolean testcase_output_read(Testcase *testcase,TestcaseOutput *output,
22.57 + gchar **contents,gsize *length,GError **error)
22.58 +{
22.59 + char *filename,*s,*t;
22.60 + gboolean retval;
22.61 + GError *tmp_err=NULL;
22.62 + if (!strcmp(output->name,"stdout"))
22.63 + {
22.64 + *contents=g_strdup(testcase->test_output);
22.65 + if (length)
22.66 + *length=strlen(testcase->test_output);
22.67 + }
22.68 + else
22.69 + {
22.70 + if (testcase->tmpdir)
22.71 + filename=g_build_filename(testcase->tmpdir,output->name,NULL);
22.72 + else
22.73 + filename=g_strdup(output->name);
22.74 + if (!g_file_get_contents(filename,&s,NULL,error))
22.75 + {
22.76 + g_free(filename);
22.77 + return FALSE;
22.78 + }
22.79 + g_free(filename);
22.80 + if (testcase->encoding)
22.81 + {
22.82 + t=dos2unix(s);
22.83 + g_free(s);
22.84 + s=g_convert(t,-1,"UTF-8",testcase->encoding,NULL,length,&tmp_err);
22.85 + g_free(t);
22.86 + if (!s)
22.87 + {
22.88 + g_propagate_prefixed_error(error,tmp_err,
22.89 + "Conversion from %s failed: ",testcase->encoding);
22.90 + return FALSE;
22.91 + }
22.92 + *contents=s;
22.93 + }
22.94 + else
22.95 + {
22.96 + *contents=dos2unix(s);
22.97 + if (length)
22.98 + *length=strlen(*contents);
22.99 + }
22.100 + }
22.101 + return TRUE;
22.102 +}
22.103 +
22.104 +/*
22.105 + * Remove an output file created by program under test.
22.106 + */
22.107 +gboolean testcase_output_remove(Testcase *testcase,TestcaseOutput *output,
22.108 + GError **error)
22.109 +{
22.110 + char *filename;
22.111 + if (!strcmp(output->name,"stdout"))
22.112 + return TRUE;
22.113 + if (testcase->tmpdir)
22.114 + filename=g_build_filename(testcase->tmpdir,output->name,NULL);
22.115 + else
22.116 + filename=g_strdup(output->name);
22.117 + if (g_unlink(filename)<0)
22.118 + {
22.119 + g_set_error(error,G_FILE_ERROR,g_file_error_from_errno(errno),
22.120 + "%s: %s",filename,g_strerror(errno));
22.121 + return FALSE;
22.122 + }
22.123 + g_free(filename);
22.124 + return TRUE;
22.125 +}
22.126 +
22.127 +/* Create a new description of an output file expected by a testcase */
22.128 +TestcaseOutput *testcase_output_new(const char *name,const char *contents)
22.129 +{
22.130 + TestcaseOutput *output;
22.131 + output=g_new0(TestcaseOutput,1);
22.132 + output->name=g_strdup(name);
22.133 + output->contents=g_strdup(contents);
22.134 + return output;
22.135 +}
22.136 +
22.137 +/* Free the description of a testcase output file */
22.138 +void testcase_output_free(TestcaseOutput *output)
22.139 +{
22.140 + g_free(output->name);
22.141 + g_free(output->contents);
22.142 + g_free(output);
22.143 +}
23.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
23.2 +++ b/test/harness/testcaseoutput.h Thu Oct 03 23:00:49 2013 +0100
23.3 @@ -0,0 +1,19 @@
23.4 +#ifndef TESTCASE_OUTPUT_H
23.5 +#define TESTCASE_OUTPUT_H
23.6 +
23.7 +#include <glib.h>
23.8 +#include "testcase.h"
23.9 +
23.10 +typedef struct {
23.11 + char *name;
23.12 + char *contents;
23.13 +} TestcaseOutput;
23.14 +
23.15 +gboolean testcase_output_read(Testcase *testcase,TestcaseOutput *output,
23.16 + gchar **contents,gsize *length,GError **error);
23.17 +gboolean testcase_output_remove(Testcase *testcase,TestcaseOutput *output,
23.18 + GError **error);
23.19 +TestcaseOutput *testcase_output_new(const char *name,const char *contents);
23.20 +void testcase_output_free(TestcaseOutput *output);
23.21 +
23.22 +#endif /* TESTCASE_OUTPUT_H */