Fix bug #13: Character sets
authorali <ali@juiblex.co.uk>
Sun Oct 27 17:01:47 2013 +0000 (2013-10-27)
changeset 103d22d8cd4f628
parent 102 ff0aa9b1397a
child 104 70cc629ec1e0
Fix bug #13: Character sets
bookloupe/bookloupe.c
sample.ini
test/bookloupe/Makefile.am
test/bookloupe/charset-cp1252.tst
test/bookloupe/charset-latin1.tst
test/bookloupe/config-default.tst
test/bookloupe/config-internal.tst
test/bookloupe/config-override.tst
test/bookloupe/config-user.tst
     1.1 --- a/bookloupe/bookloupe.c	Fri Oct 25 11:15:18 2013 +0100
     1.2 +++ b/bookloupe/bookloupe.c	Sun Oct 27 17:01:47 2013 +0000
     1.3 @@ -32,6 +32,9 @@
     1.4  #include "pending.h"
     1.5  #include "HTMLentities.h"
     1.6  
     1.7 +gchar *charset;		/* Or NULL for auto (ISO_8859-1/ASCII or UNICODE) */
     1.8 +GIConv charset_validator=(GIConv)-1;
     1.9 +
    1.10  gchar *prevline;
    1.11  
    1.12  /* Common typos. */
    1.13 @@ -127,6 +130,7 @@
    1.14  }; 
    1.15  
    1.16  gboolean pswit[SWITNO];  /* program switches */
    1.17 +gchar *opt_charset;
    1.18  
    1.19  gboolean typo_compat,paranoid_compat;
    1.20  
    1.21 @@ -198,6 +202,8 @@
    1.22      { "no-verbose", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_REVERSE,
    1.23        G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
    1.24        "Switch off verbose mode", NULL },
    1.25 +    { "charset", 0, 0, G_OPTION_ARG_STRING, &opt_charset,
    1.26 +      "Set of characters valid for this ebook", "NAME" },
    1.27      { NULL }
    1.28  };
    1.29  
    1.30 @@ -262,11 +268,55 @@
    1.31  UINT saved_cp;
    1.32  #endif
    1.33  
    1.34 +gboolean set_charset(const char *name,GError **err)
    1.35 +{
    1.36 +    /* The various UNICODE encodings all share the same character set. */
    1.37 +    const char *unicode_aliases[]={ "UCS-2", "UCS-2BE", "UCS-2LE", "UCS-4",
    1.38 +      "UCS-4BE", "UCS-4LE", "UCS2", "UCS4", "UNICODE", "UNICODEBIG",
    1.39 +      "UNICODELITTLE", "UTF-7", "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE",
    1.40 +      "UTF-32", "UTF-32BE", "UTF-32LE", "UTF7", "UTF8", "UTF16", "UTF16BE",
    1.41 +      "UTF16LE", "UTF32", "UTF32BE", "UTF32LE" };
    1.42 +    int i;
    1.43 +    if (charset)
    1.44 +	g_free(charset);
    1.45 +    if (charset_validator!=(GIConv)-1)
    1.46 +	g_iconv_close(charset_validator);
    1.47 +    if (!name || !g_strcasecmp(name,"auto"))
    1.48 +    {
    1.49 +	charset=NULL;
    1.50 +	charset_validator=(GIConv)-1;
    1.51 +	return TRUE;
    1.52 +    }
    1.53 +    else
    1.54 +	charset=g_strdup(name);
    1.55 +    for(i=0;i<G_N_ELEMENTS(unicode_aliases);i++)
    1.56 +	if (!g_strcasecmp(charset,unicode_aliases[i]))
    1.57 +	{
    1.58 +	    g_free(charset);
    1.59 +	    charset=g_strdup("UTF-8");
    1.60 +	    break;
    1.61 +	}
    1.62 +    if (!strcmp(charset,"UTF-8"))
    1.63 +	charset_validator=(GIConv)-1;
    1.64 +    else
    1.65 +    {
    1.66 +	charset_validator=g_iconv_open(charset,"UTF-8");
    1.67 +	if (charset_validator==(GIConv)-1)
    1.68 +	{
    1.69 +	    g_set_error(err,G_CONVERT_ERROR,G_CONVERT_ERROR_NO_CONVERSION,
    1.70 +	      "Unknown character set \"%s\"",charset);
    1.71 +	    return FALSE;
    1.72 +	}
    1.73 +    }
    1.74 +    return TRUE;
    1.75 +}
    1.76 +
    1.77  GKeyFile *config;
    1.78  
    1.79  void config_file_update(GKeyFile *kf)
    1.80  {
    1.81      int i;
    1.82 +    const char *s;
    1.83      gboolean sw;
    1.84      for(i=0;options[i].long_name;i++)
    1.85      {
    1.86 @@ -279,6 +329,13 @@
    1.87  		sw=!sw;
    1.88  	    g_key_file_set_boolean(kf,"options",options[i].long_name,sw);
    1.89  	}
    1.90 +	else if (options[i].arg==G_OPTION_ARG_STRING)
    1.91 +	{
    1.92 +	    s=*(gchar **)options[i].arg_data;
    1.93 +	    if (!s)
    1.94 +		s="auto";
    1.95 +	    g_key_file_set_string(kf,"options",options[i].long_name,s);
    1.96 +	}
    1.97  	else
    1.98  	    g_assert_not_reached();
    1.99      }
   1.100 @@ -375,7 +432,7 @@
   1.101  void parse_config_file(void)
   1.102  {
   1.103      int i,j;
   1.104 -    gchar *path;
   1.105 +    gchar *path,*s;
   1.106      gchar **keys;
   1.107      gboolean sw;
   1.108      GError *err=NULL;
   1.109 @@ -404,9 +461,35 @@
   1.110  			      path,keys[i],err->message);
   1.111  			    g_clear_error(&err);
   1.112  			}
   1.113 -			if (options[j].flags&G_OPTION_FLAG_REVERSE)
   1.114 -			    sw=!sw;
   1.115 -			*(gboolean *)options[j].arg_data=sw;
   1.116 +			else
   1.117 +			{
   1.118 +			    if (options[j].flags&G_OPTION_FLAG_REVERSE)
   1.119 +				sw=!sw;
   1.120 +			    *(gboolean *)options[j].arg_data=sw;
   1.121 +			}
   1.122 +			break;
   1.123 +		    }
   1.124 +		    else if (options[j].arg==G_OPTION_ARG_STRING)
   1.125 +		    {
   1.126 +			s=g_key_file_get_string(config,"options",keys[i],
   1.127 +			  &err);
   1.128 +			if (err)
   1.129 +			{
   1.130 +			    g_printerr("Bookloupe: %s: options.%s: %s\n",
   1.131 +			      path,keys[i],err->message);
   1.132 +			    g_clear_error(&err);
   1.133 +			}
   1.134 +			else
   1.135 +			{
   1.136 +			    g_free(*(gchar **)options[j].arg_data);
   1.137 +			    if (!g_strcmp0(s,"auto"))
   1.138 +			    {
   1.139 +				*(gchar **)options[j].arg_data=NULL;
   1.140 +				g_free(s);
   1.141 +			    }
   1.142 +			    else
   1.143 +				*(gchar **)options[j].arg_data=s;
   1.144 +			}
   1.145  			break;
   1.146  		    }
   1.147  		    else
   1.148 @@ -475,11 +558,18 @@
   1.149  	pswit[USERTYPO_SWITCH]=FALSE;
   1.150  	pswit[DP_SWITCH]=FALSE;
   1.151      }
   1.152 +    if (opt_charset && !set_charset(opt_charset,&err))
   1.153 +    {
   1.154 +	g_printerr("%s\n",err->message);
   1.155 +	exit(1);
   1.156 +    }
   1.157      if (pswit[DUMP_CONFIG_SWITCH])
   1.158      {
   1.159  	dump_config();
   1.160  	exit(0);
   1.161      }
   1.162 +    g_free(opt_charset);
   1.163 +    opt_charset=NULL;
   1.164      if (pswit[OVERVIEW_SWITCH])
   1.165  	/* just print summary; don't echo */
   1.166  	pswit[ECHO_SWITCH]=FALSE;
   1.167 @@ -542,7 +632,11 @@
   1.168  	exit(1);
   1.169      }
   1.170      if (g_utf8_validate(contents,len,NULL))
   1.171 +    {
   1.172  	utf8=g_utf8_normalize(contents,len,G_NORMALIZE_DEFAULT_COMPOSE);
   1.173 +	if (!charset)
   1.174 +	    (void)set_charset("UNICODE",NULL);
   1.175 +    }
   1.176      else
   1.177  	utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
   1.178      g_free(contents);
   1.179 @@ -674,6 +768,7 @@
   1.180      g_free(running_from);
   1.181      if (usertypo)
   1.182  	g_tree_unref(usertypo);
   1.183 +    set_charset(NULL,NULL);
   1.184      if (config)
   1.185  	g_key_file_free(config);
   1.186      return 0;
   1.187 @@ -1024,25 +1119,32 @@
   1.188  	  "Not reporting them.\n",
   1.189  	  results->spacedash+results->emdash.non_PG_space);
   1.190      }
   1.191 -    /* If more than a quarter of characters are hi-bit, bug out. */
   1.192 -    warnings.bin=1;
   1.193 -    if (results->binlen*4>results->totlen)
   1.194 +    if (charset)
   1.195 +	warnings.bin=0;
   1.196 +    else
   1.197      {
   1.198 -	g_print("   --> This file does not appear to be ASCII. "
   1.199 -	  "Terminating. Best of luck with it!\n");
   1.200 -	exit(1);
   1.201 -    }
   1.202 -    if (results->alphalen*4<results->totlen)
   1.203 -    {
   1.204 -	g_print("   --> This file does not appear to be text. "
   1.205 -	  "Terminating. Best of luck with it!\n");
   1.206 -	exit(1);
   1.207 -    }
   1.208 -    if (results->binlen*100>results->totlen || results->binlen>100)
   1.209 -    {
   1.210 -	g_print("   --> There are a lot of foreign letters here. "
   1.211 -	  "Not reporting them.\n");
   1.212 -	warnings.bin=0;
   1.213 +	/* Charset ISO_8859-1/ASCII checks for compatibility with gutcheck */
   1.214 +	warnings.bin=1;
   1.215 +	/* If more than a quarter of characters are hi-bit, bug out. */
   1.216 +	if (results->binlen*4>results->totlen)
   1.217 +	{
   1.218 +	    g_print("   --> This file does not appear to be ASCII. "
   1.219 +	      "Terminating. Best of luck with it!\n");
   1.220 +	    exit(1);
   1.221 +	}
   1.222 +	if (results->alphalen*4<results->totlen)
   1.223 +	{
   1.224 +	    g_print("   --> This file does not appear to be text. "
   1.225 +	      "Terminating. Best of luck with it!\n");
   1.226 +	    exit(1);
   1.227 +	}
   1.228 +	if (results->binlen*100>results->totlen || results->binlen>100)
   1.229 +	{
   1.230 +	    g_print("   --> There are a lot of foreign letters here. "
   1.231 +	      "Not reporting them.\n");
   1.232 +	    if (!pswit[VERBOSE_SWITCH])
   1.233 +		warnings.bin=0;
   1.234 +	}
   1.235      }
   1.236      warnings.isDutch=FALSE;
   1.237      if (results->Dutchcount>50)
   1.238 @@ -1070,7 +1172,6 @@
   1.239      g_print("\n");
   1.240      if (pswit[VERBOSE_SWITCH])
   1.241      {
   1.242 -	warnings.bin=1;
   1.243  	warnings.shortline=1;
   1.244  	warnings.dotcomma=1;
   1.245  	warnings.longline=1;
   1.246 @@ -1265,14 +1366,17 @@
   1.247    gboolean isemptyline)
   1.248  {
   1.249      /* Don't repeat multiple warnings on one line. */
   1.250 -    gboolean eNon_A=FALSE,eTab=FALSE,eTilde=FALSE;
   1.251 +    gboolean eInvalidChar=FALSE,eTab=FALSE,eTilde=FALSE;
   1.252      gboolean eCarat=FALSE,eFSlash=FALSE,eAst=FALSE;
   1.253      const char *s;
   1.254      gunichar c;
   1.255 +    gsize nb;
   1.256 +    gchar *t;
   1.257      for (s=aline;*s;s=g_utf8_next_char(s))
   1.258      {
   1.259  	c=g_utf8_get_char(s);
   1.260 -	if (!eNon_A && (c<CHAR_SPACE && c!='\t' && c!='\n' || c>127))
   1.261 +	if (warnings->bin && !eInvalidChar &&
   1.262 +	  (c<CHAR_SPACE && c!='\t' && c!='\n' || c>127))
   1.263  	{
   1.264  	    if (pswit[ECHO_SWITCH])
   1.265  		g_print("\n%s\n",aline);
   1.266 @@ -1287,7 +1391,57 @@
   1.267  		      linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
   1.268  	    else
   1.269  		cnt_bin++;
   1.270 -	    eNon_A=TRUE;
   1.271 +	    eInvalidChar=TRUE;
   1.272 +	}
   1.273 +	if (!eInvalidChar && charset)
   1.274 +	{
   1.275 +	    if (charset_validator==(GIConv)-1)
   1.276 +	    {
   1.277 +		if (!g_unichar_isdefined(c))
   1.278 +		{
   1.279 +		    if (pswit[ECHO_SWITCH])
   1.280 +			g_print("\n%s\n",aline);
   1.281 +		    if (!pswit[OVERVIEW_SWITCH])
   1.282 +			g_print("    Line %ld column %ld - Unassigned UNICODE "
   1.283 +			  "code point U+%04" G_GINT32_MODIFIER "X\n",
   1.284 +			  linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
   1.285 +		    else
   1.286 +			cnt_bin++;
   1.287 +		    eInvalidChar=TRUE;
   1.288 +		}
   1.289 +		else if (c>=0xE000 && c<=0xF8FF || c>=0xF0000 && c<=0xFFFFD ||
   1.290 +		  c>=100000 && c<=0x10FFFD)
   1.291 +		{
   1.292 +		    if (pswit[ECHO_SWITCH])
   1.293 +			g_print("\n%s\n",aline);
   1.294 +		    if (!pswit[OVERVIEW_SWITCH])
   1.295 +			g_print("    Line %ld column %ld - Private Use "
   1.296 +			  "character U+%04" G_GINT32_MODIFIER "X\n",
   1.297 +			  linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
   1.298 +		    else
   1.299 +			cnt_bin++;
   1.300 +		    eInvalidChar=TRUE;
   1.301 +		}
   1.302 +	    }
   1.303 +	    else
   1.304 +	    {
   1.305 +		t=g_convert_with_iconv(s,g_utf8_next_char(s)-s,
   1.306 +		  charset_validator,NULL,&nb,NULL);
   1.307 +		if (t)
   1.308 +		    g_free(t);
   1.309 +		else
   1.310 +		{
   1.311 +		    if (pswit[ECHO_SWITCH])
   1.312 +			g_print("\n%s\n",aline);
   1.313 +		    if (!pswit[OVERVIEW_SWITCH])
   1.314 +			g_print("    Line %ld column %ld - Non-%s "
   1.315 +			  "character %u\n",linecnt,
   1.316 +			  g_utf8_pointer_to_offset(aline,s)+1,charset,c);
   1.317 +		    else
   1.318 +			cnt_bin++;
   1.319 +		    eInvalidChar=TRUE;
   1.320 +		}
   1.321 +	    }
   1.322  	}
   1.323  	if (!eTab && c==CHAR_TAB)
   1.324  	{
   1.325 @@ -2975,8 +3129,7 @@
   1.326  	if (s>=aline && g_utf8_get_char(s)=='-')
   1.327  	    enddash=TRUE;
   1.328  	check_for_control_characters(aline);
   1.329 -	if (warnings->bin)
   1.330 -	    check_for_odd_characters(aline,warnings,isemptyline);
   1.331 +	check_for_odd_characters(aline,warnings,isemptyline);
   1.332  	if (warnings->longline)
   1.333  	    check_for_long_line(aline);
   1.334  	if (warnings->shortline)
     2.1 --- a/sample.ini	Fri Oct 25 11:15:18 2013 +0100
     2.2 +++ b/sample.ini	Sun Oct 27 17:01:47 2013 +0000
     2.3 @@ -29,3 +29,5 @@
     2.4  web=false
     2.5  # Verbose - list everything
     2.6  verbose=false
     2.7 +# Set of characters valid for this ebook
     2.8 +charset=auto
     3.1 --- a/test/bookloupe/Makefile.am	Fri Oct 25 11:15:18 2013 +0100
     3.2 +++ b/test/bookloupe/Makefile.am	Sun Oct 27 17:01:47 2013 +0000
     3.3 @@ -2,7 +2,7 @@
     3.4  TESTS=non-ascii.tst long-line.tst curved-single-quotes.tst curved-quotes.tst \
     3.5  	runfox-quotes.tst curved-genitives.tst multi-line-illustration.tst \
     3.6  	emdash.tst config-internal.tst config-default.tst config-user.tst \
     3.7 -	config-override.tst footnote-marker.tst unix-lineends.tst \
     3.8 -	os9-lineends.tst
     3.9 +	config-override.tst charset-cp1252.tst charset-latin1.tst \
    3.10 +	footnote-marker.tst unix-lineends.tst os9-lineends.tst
    3.11  
    3.12  dist_pkgdata_DATA=$(TESTS)
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/test/bookloupe/charset-cp1252.tst	Sun Oct 27 17:01:47 2013 +0000
     4.3 @@ -0,0 +1,16 @@
     4.4 +**************** OPTIONS ****************
     4.5 +--charset=WINDOWS-1252
     4.6 +**************** ENCODING ****************
     4.7 +WINDOWS-1252
     4.8 +**************** INPUT ****************
     4.9 +Unless binary mode is engaged, gutcheck will warn about a number of
    4.10 +characters defined in Windows-1252. Bookloupe provides support for
    4.11 +disabling such checks without concern as to the file size and how
    4.12 +many characters with the eighth bit set it may contain by allowing a
    4.13 +character set to be declared. With the character set declared as
    4.14 +WINDOWS-1252, all characters defined in Windows-1252 shoud be acceptable
    4.15 +and no warnings should be issued.
    4.16 +
    4.17 +We test for this by including just one such character—the em dash.
    4.18 +
    4.19 +**************** EXPECTED ****************
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/test/bookloupe/charset-latin1.tst	Sun Oct 27 17:01:47 2013 +0000
     5.3 @@ -0,0 +1,58 @@
     5.4 +**************** OPTIONS ****************
     5.5 +--charset=ISO-8859-1
     5.6 +**************** ENCODING ****************
     5.7 +WINDOWS-1252
     5.8 +**************** INPUT ****************
     5.9 +Where the character set declared is narrower than the character set
    5.10 +implied by the encoding as in this case (Windows-1252 is a superset
    5.11 +of the first latin alphabet defined in ECMA 94), then bookloupe should
    5.12 +warn about characters that are not in the declared character set but
    5.13 +should still recognise them and otherwise handle them as it would
    5.14 +normally do. We use the curved apostrophe as a test for this since
    5.15 +if bookloupe didn't recognise it then it would query the orphaned
    5.16 +letters from the genitives and abbreviations.
    5.17 +
    5.18 +John Hendricks was bear-leading at the time. He had originally studied
    5.19 +for Holy Orders, but had abandoned the Church later for private reasons
    5.20 +connected with his faith, and had taken to teaching and tutoring
    5.21 +instead. He was an honest, upstanding fellow of five-and-thirty,
    5.22 +incorruptible, intelligent in a simple, straightforward way. He played
    5.23 +games with his head, more than most Englishmen do, but he went through
    5.24 +life without much calculation. He had qualities that made boys like
    5.25 +and respect him; he won their confidence. Poor, proud, ambitious,
    5.26 +he realised that fate offered him a chance when the Secretary of
    5.27 +State for Scotland asked him if he would give up his other pupils
    5.28 +for a year and take his son, Lord Ernie, round the world upon an
    5.29 +educational trip that might make a man of him. For Lord Ernie was the
    5.30 +only son, and the Marquess’s influence was naturally great. To have
    5.31 +deposited a regenerated Lord Ernie at the castle gates might have
    5.32 +guaranteed Hendricks’ future. After leaving Eton prematurely the lad
    5.33 +had come under Hendricks’ charge for a time, and with such excellent
    5.34 +results--‘I’d simply swear by that chap, you know,’ the boy used
    5.35 +to say--that his father, considerably impressed, and rather as a
    5.36 +last resort, had made this proposition. And Hendricks, without much
    5.37 +calculation, had accepted it. He liked ‘Bindy’ for himself. It was
    5.38 +in his heart to ‘make a man of him,’ if possible. They had now been
    5.39 +round the world together and had come up from Brindisi to the Italian
    5.40 +Lakes, and so into Switzerland. It was middle October. With a week or
    5.41 +two to spare they were making leisurely for the ancestral halls in
    5.42 +Aberdeenshire.
    5.43 +**************** EXPECTED ****************
    5.44 +
    5.45 +only son, and the Marquess’s influence was naturally great. To have
    5.46 +    Line 22 column 27 - Non-ISO-8859-1 character 8217
    5.47 +
    5.48 +guaranteed Hendricks’ future. After leaving Eton prematurely the lad
    5.49 +    Line 24 column 21 - Non-ISO-8859-1 character 8217
    5.50 +
    5.51 +had come under Hendricks’ charge for a time, and with such excellent
    5.52 +    Line 25 column 25 - Non-ISO-8859-1 character 8217
    5.53 +
    5.54 +results--‘I’d simply swear by that chap, you know,’ the boy used
    5.55 +    Line 26 column 10 - Non-ISO-8859-1 character 8216
    5.56 +
    5.57 +calculation, had accepted it. He liked ‘Bindy’ for himself. It was
    5.58 +    Line 29 column 40 - Non-ISO-8859-1 character 8216
    5.59 +
    5.60 +in his heart to ‘make a man of him,’ if possible. They had now been
    5.61 +    Line 30 column 17 - Non-ISO-8859-1 character 8216
     6.1 --- a/test/bookloupe/config-default.tst	Fri Oct 25 11:15:18 2013 +0100
     6.2 +++ b/test/bookloupe/config-default.tst	Sun Oct 27 17:01:47 2013 +0000
     6.3 @@ -30,6 +30,8 @@
     6.4  usertypo=false
     6.5  # Verbose - list everything
     6.6  verbose=false
     6.7 +# Set of characters valid for this ebook
     6.8 +charset=auto
     6.9  **************** EXPECTED(stdout) ****************
    6.10  # Default configuration for bookloupe
    6.11  
    6.12 @@ -60,3 +62,5 @@
    6.13  usertypo=false
    6.14  # Verbose - list everything
    6.15  verbose=false
    6.16 +# Set of characters valid for this ebook
    6.17 +charset=auto
     7.1 --- a/test/bookloupe/config-internal.tst	Fri Oct 25 11:15:18 2013 +0100
     7.2 +++ b/test/bookloupe/config-internal.tst	Sun Oct 27 17:01:47 2013 +0000
     7.3 @@ -30,3 +30,5 @@
     7.4  usertypo=false
     7.5  # Verbose - list everything
     7.6  verbose=false
     7.7 +# Set of characters valid for this ebook
     7.8 +charset=auto
     8.1 --- a/test/bookloupe/config-override.tst	Fri Oct 25 11:15:18 2013 +0100
     8.2 +++ b/test/bookloupe/config-override.tst	Sun Oct 27 17:01:47 2013 +0000
     8.3 @@ -1,5 +1,6 @@
     8.4  **************** OPTIONS ****************
     8.5  --usertypo
     8.6 +--charset=auto
     8.7  --dump-config
     8.8  **************** INPUT(bookloupe.ini) ****************
     8.9  # Relaxed configuration for bookloupe
    8.10 @@ -31,6 +32,8 @@
    8.11  usertypo=false
    8.12  # Verbose - list everything
    8.13  verbose=false
    8.14 +# Set of characters valid for this ebook
    8.15 +charset=UNICODE
    8.16  **************** EXPECTED(stdout) ****************
    8.17  # Relaxed configuration for bookloupe
    8.18  
    8.19 @@ -61,3 +64,5 @@
    8.20  usertypo=true
    8.21  # Verbose - list everything
    8.22  verbose=false
    8.23 +# Set of characters valid for this ebook
    8.24 +charset=auto
     9.1 --- a/test/bookloupe/config-user.tst	Fri Oct 25 11:15:18 2013 +0100
     9.2 +++ b/test/bookloupe/config-user.tst	Sun Oct 27 17:01:47 2013 +0000
     9.3 @@ -35,6 +35,8 @@
     9.4  usertypo=true
     9.5  # Verbose - list everything - Contrary by name...
     9.6  verbose=true
     9.7 +# Set of characters valid for this ebook - Let's stick with Latin1
     9.8 +charset=ISO-8859-1
     9.9  **************** EXPECTED(stdout) ****************
    9.10  # Mary Contrary's configuration for bookloupe
    9.11  
    9.12 @@ -70,3 +72,5 @@
    9.13  usertypo=true
    9.14  # Verbose - list everything - Contrary by name...
    9.15  verbose=true
    9.16 +# Set of characters valid for this ebook - Let's stick with Latin1
    9.17 +charset=ISO-8859-1