Add a testcase for embedded LFs
authorali <ali@juiblex.co.uk>
Fri Jan 27 23:59:51 2012 +0000 (2012-01-27)
changeset 8cf332d440466
parent 7 721e468c10f3
child 9 6a13fe0fc19e
Add a testcase for embedded LFs
test/compatibility/Makefile.am
test/compatibility/embedded-lf.tst
test/harness/testcase.c
test/harness/testcaseio.c
     1.1 --- a/test/compatibility/Makefile.am	Fri Jan 27 21:40:35 2012 +0000
     1.2 +++ b/test/compatibility/Makefile.am	Fri Jan 27 23:59:51 2012 +0000
     1.3 @@ -2,6 +2,6 @@
     1.4  TESTS=missing-space.tst spaced-punctuation.tst html-tag.tst html-symbol.tst \
     1.5  	spaced-doublequote.tst mismatched-quotes.tst he-be.tst digits.tst \
     1.6  	extra-period.tst ellipsis.tst short-line.tst abbreviation.tst \
     1.7 -	example.tst non-ascii.tst
     1.8 +	example.tst non-ascii.tst embedded-lf.tst
     1.9  
    1.10  dist_pkgdata_DATA=$(TESTS)
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/test/compatibility/embedded-lf.tst	Fri Jan 27 23:59:51 2012 +0000
     2.3 @@ -0,0 +1,10 @@
     2.4 +**************** INPUT ****************
     2.5 +Katherine was assailed by a sudden doubt. Had she mailed that letter? Yes,␊she was certain of that. She had run out to the mail box at ten o'clock
     2.6 +at night especially to mail it. What had gone wrong? Why wasn't there␊someone to meet her?
     2.7 +**************** EXPECTED ****************
     2.8 +
     2.9 +Katherine was assailed by a sudden doubt. Had she mailed that letter? Yes,
    2.10 +    Line 1 - No CR?
    2.11 +
    2.12 +at night especially to mail it. What had gone wrong? Why wasn't there
    2.13 +    Line 3 - No CR?
     3.1 --- a/test/harness/testcase.c	Fri Jan 27 21:40:35 2012 +0000
     3.2 +++ b/test/harness/testcase.c	Fri Jan 27 23:59:51 2012 +0000
     3.3 @@ -3,10 +3,6 @@
     3.4  #include <string.h>
     3.5  #include <unistd.h>
     3.6  #include <errno.h>
     3.7 -#ifdef WIN32
     3.8 -#include <io.h>
     3.9 -#endif
    3.10 -#include <fcntl.h>
    3.11  #include <bl/bl.h>
    3.12  #include "testcase.h"
    3.13  
    3.14 @@ -15,86 +11,12 @@
    3.15      return g_quark_from_static_string("testcase-error-quark");
    3.16  }
    3.17  
    3.18 -#if !HAVE_MKSTEMP
    3.19  /*
    3.20 - * An insecure implementation of mkstemp(), for those platforms that
    3.21 - * don't support it.
    3.22 + * As write(), but with error handling.
    3.23   */
    3.24 -int mkstemp(char *template)
    3.25 +static size_t write_file(int fd,const char *buf,size_t count,GError **error)
    3.26  {
    3.27 -    int fd;
    3.28 -    char *s;
    3.29 -    for(;;)
    3.30 -    {
    3.31 -	s=g_strdup(template);
    3.32 -	mktemp(s);
    3.33 -	if (!*s)
    3.34 -	{
    3.35 -	    errno=EEXIST;
    3.36 -	    g_free(s);
    3.37 -	    return -1;
    3.38 -	}
    3.39 -	fd=open(s,O_RDWR|O_CREAT|O_EXCL,0600);
    3.40 -	if (fd>0)
    3.41 -	{
    3.42 -	    strcpy(template,s);
    3.43 -	    g_free(s);
    3.44 -	    return fd;
    3.45 -	}
    3.46 -	else
    3.47 -	    g_free(s);
    3.48 -    }
    3.49 -}
    3.50 -#endif	/* !HAVE_MKSTEMP */
    3.51 -
    3.52 -/*
    3.53 - * As write(), but always convert NL to CR NL.
    3.54 - */
    3.55 -static size_t write_text(int fd,const char *buf,size_t count,GError **error)
    3.56 -{
    3.57 -    size_t i;
    3.58 -    FILE *fp;
    3.59 -    fd=dup(fd);
    3.60 -    if (fd<0)
    3.61 -	return -1;
    3.62 -#ifdef WIN32
    3.63 -    if (_setmode(fd,_O_BINARY)<0)
    3.64 -    {
    3.65 -	close(fd);
    3.66 -	g_set_error(error,G_FILE_ERROR,g_file_error_from_errno(errno),
    3.67 -	  "Failed to set mode of bookloupe input file to binary: %s",
    3.68 -	  g_strerror(errno));
    3.69 -	return -1;
    3.70 -    }
    3.71 -#endif
    3.72 -    fp=fdopen(fd,"wb");
    3.73 -    if (!fp)
    3.74 -    {
    3.75 -	close(fd);
    3.76 -	g_set_error(error,G_FILE_ERROR,g_file_error_from_errno(errno),
    3.77 -	  "Failed to open stream to bookloupe input file: %s",
    3.78 -	  g_strerror(errno));
    3.79 -	return -1;
    3.80 -    }
    3.81 -    for(i=0;i<count;i++)
    3.82 -    {
    3.83 -	if (buf[i]=='\n')
    3.84 -	    if (putc('\r',fp)==EOF)
    3.85 -	    {
    3.86 -		g_set_error(error,G_FILE_ERROR,g_file_error_from_errno(errno),
    3.87 -		  "Error writing bookloupe input file: %s",g_strerror(errno));
    3.88 -		(void)fclose(fp);
    3.89 -		return -1;
    3.90 -	    }
    3.91 -	if (putc(buf[i],fp)==EOF)
    3.92 -	{
    3.93 -	    g_set_error(error,G_FILE_ERROR,g_file_error_from_errno(errno),
    3.94 -	      "Error writing bookloupe input file: %s",g_strerror(errno));
    3.95 -	    (void)fclose(fp);
    3.96 -	    return -1;
    3.97 -	}
    3.98 -    }
    3.99 -    if (fclose(fp))
   3.100 +    if (write(fd,buf,count)<count)
   3.101      {
   3.102  	g_set_error(error,G_FILE_ERROR,g_file_error_from_errno(errno),
   3.103  	  "Error writing bookloupe input file: %s",g_strerror(errno));
   3.104 @@ -104,20 +26,30 @@
   3.105  }
   3.106  
   3.107  /*
   3.108 - * Return the length (in bytes) or any common prefix between s1 and s2.
   3.109 + * Return the length (in bytes) of any common prefix between s1 and s2.
   3.110 + * The returned length will always represent an exact number of characters.
   3.111   */
   3.112  size_t common_prefix_length(const char *s1,const char *s2)
   3.113  {
   3.114 -    size_t i;
   3.115 -    for(i=0;s1[i] && s2[i] && s1[i]==s2[i];i++)
   3.116 -	;
   3.117 -    return i;
   3.118 +    gunichar c1,c2;
   3.119 +    const char *s=s1;
   3.120 +    while(*s1 && *s2)
   3.121 +    {
   3.122 +	c1=g_utf8_get_char(s1);
   3.123 +	c2=g_utf8_get_char(s2);
   3.124 +	if (c1!=c2)
   3.125 +	    break;
   3.126 +	s1=g_utf8_next_char(s1);
   3.127 +	s2=g_utf8_next_char(s2);
   3.128 +    }
   3.129 +    return s1-s;
   3.130  }
   3.131  
   3.132  void print_unexpected(const char *unexpected,gsize differs_at)
   3.133  {
   3.134      int col;
   3.135 -    const char *endp,*bol;
   3.136 +    gunichar c;
   3.137 +    const char *endp,*bol,*s;
   3.138      GString *string;
   3.139      endp=strchr(unexpected+differs_at,'\n');
   3.140      if (!endp)
   3.141 @@ -128,27 +60,64 @@
   3.142  	bol++;
   3.143      else
   3.144  	bol=string->str;
   3.145 -    col=differs_at-(bol-string->str);
   3.146 +    col=0;
   3.147 +    s=bol;
   3.148 +    endp=string->str+differs_at;
   3.149 +    while(s<endp)
   3.150 +    {
   3.151 +	c=g_utf8_get_char(s);
   3.152 +	s=g_utf8_next_char(s);
   3.153 +	if (c=='\t')
   3.154 +	    col=(col&~7)+8;
   3.155 +	else if (g_unichar_iswide(c))
   3.156 +	    col+=2;
   3.157 +	else if (!g_unichar_iszerowidth(c))
   3.158 +	    col++;
   3.159 +    }
   3.160      fprintf(stderr,"%s\n%*s^\n",string->str,col,"");
   3.161      g_string_free(string,TRUE);
   3.162  }
   3.163  
   3.164 +/*
   3.165 + * Replace \n with \r\n and U+240A (visible symbol for LF) with \n
   3.166 + */
   3.167 +char *unix2dos(const char *text)
   3.168 +{
   3.169 +    gunichar c;
   3.170 +    const gunichar visible_lf=0x240A;
   3.171 +    GString *string;
   3.172 +    string=g_string_new(NULL);
   3.173 +    while(*text)
   3.174 +    {
   3.175 +	c=g_utf8_get_char(text);
   3.176 +	text=g_utf8_next_char(text);
   3.177 +	if (c=='\n')
   3.178 +	    g_string_append(string,"\r\n");
   3.179 +	else if (c==visible_lf)
   3.180 +	    g_string_append_c(string,'\n');
   3.181 +	else
   3.182 +	    g_string_append_unichar(string,c);
   3.183 +    }
   3.184 +    return g_string_free(string,FALSE);
   3.185 +}
   3.186 +
   3.187  gboolean spawn_bootloupe(const char *encoding,const char *standard_input,
   3.188    char **standard_output,char **filename,GError **error)
   3.189  {
   3.190      gboolean r;
   3.191      int fd,exit_status;
   3.192      size_t n,pos,offset;
   3.193 -    FILE *fp;
   3.194      char input[]="TEST-XXXXXX";
   3.195      char *command[3];
   3.196 -    char *output,*s;
   3.197 +    char *output,*s,*t;
   3.198      GError *tmp_err=NULL;
   3.199      if (standard_input)
   3.200      {
   3.201  	if (encoding)
   3.202  	{
   3.203 -	    s=g_convert(standard_input,-1,encoding,"UTF-8",NULL,&n,&tmp_err);
   3.204 +	    t=unix2dos(standard_input);
   3.205 +	    s=g_convert(t,-1,encoding,"UTF-8",NULL,&n,&tmp_err);
   3.206 +	    g_free(t);
   3.207  	    if (!s)
   3.208  	    {
   3.209  		g_propagate_prefixed_error(error,tmp_err,
   3.210 @@ -158,7 +127,7 @@
   3.211  	}
   3.212  	else
   3.213  	{
   3.214 -	    s=g_strdup(standard_input);
   3.215 +	    s=unix2dos(standard_input);
   3.216  	    n=strlen(s);
   3.217  	}
   3.218      }
   3.219 @@ -167,8 +136,8 @@
   3.220  	n=0;
   3.221  	s=NULL;
   3.222      }
   3.223 -    fd=mkstemp(input);
   3.224 -    if (n && write_text(fd,s,n,error)!=n)
   3.225 +    fd=g_mkstemp(input);
   3.226 +    if (n && write_file(fd,s,n,error)!=n)
   3.227      {
   3.228  	g_free(s);
   3.229  	close(fd);
     4.1 --- a/test/harness/testcaseio.c	Fri Jan 27 21:40:35 2012 +0000
     4.2 +++ b/test/harness/testcaseio.c	Fri Jan 27 23:59:51 2012 +0000
     4.3 @@ -39,7 +39,7 @@
     4.4  	else if (!testcase->expected && !strcmp(tag,"EXPECTED"))
     4.5  	    testcase->expected=g_strdup(text);
     4.6  	else if (!testcase->encoding && !strcmp(tag,"ENCODING"))
     4.7 -	    testcase->encoding=g_strdup(text);
     4.8 +	    testcase->encoding=g_strchomp(g_strdup(text));
     4.9  	else
    4.10  	{
    4.11  	    fprintf(stderr,"%s: Not a valid testcase (%s)\n",filename,tag);