Skip UTF-8 BOM in .tst files
authorali <ali@juiblex.co.uk>
Tue Jan 31 01:07:38 2012 +0000 (2012-01-31)
changeset 13eca715c100fe
parent 12 84459e0f099b
child 14 3c57a6fd0a13
Skip UTF-8 BOM in .tst files
bl/textfileutils.c
     1.1 --- a/bl/textfileutils.c	Mon Jan 30 23:32:57 2012 +0000
     1.2 +++ b/bl/textfileutils.c	Tue Jan 31 01:07:38 2012 +0000
     1.3 @@ -6,25 +6,28 @@
     1.4   * Read a file into memory (which should be freed with mem_free when no
     1.5   * longer required). Returns NULL on error and outputs a suitable error
     1.6   * message to stderr.
     1.7 - * DOS-style line endings are handled transparently even on platforms which
     1.8 - * don't normally use this format.
     1.9 + * DOS-style line endings and UTF-8 BOM are handled transparently even
    1.10 + * on platforms which don't normally use these formats.
    1.11   */
    1.12  gboolean file_get_contents_text(const char *filename,char **contents,
    1.13    size_t *length)
    1.14  {
    1.15      int i;
    1.16 -    char *raw;
    1.17 +    unsigned char *raw;
    1.18      size_t raw_length;
    1.19      GString *string;
    1.20      GError *error=NULL;
    1.21 -    if (!g_file_get_contents(filename,&raw,&raw_length,&error))
    1.22 +    if (!g_file_get_contents(filename,(char *)&raw,&raw_length,&error))
    1.23      {
    1.24  	fprintf(stderr,"%s: %s\n",filename,error->message);
    1.25  	g_error_free(error);
    1.26  	return FALSE;
    1.27      }
    1.28      string=g_string_new(NULL);
    1.29 -    for(i=0;i<raw_length;i++)
    1.30 +    i=0;
    1.31 +    if (raw_length>=3 && raw[0]==0xEF && raw[1]==0xBB && raw[2]==0xBF)
    1.32 +	i+=3;			/* Skip BOM (U+FEFF) */
    1.33 +    for(;i<raw_length;i++)
    1.34  	if (raw[i]!='\r')
    1.35  	    g_string_append_c(string,raw[i]);
    1.36      g_free(raw);