Fix bug #24: Accept alternate form of newline
authorali <ali@juiblex.co.uk>
Sat Oct 26 18:47:33 2013 +0100 (2013-10-26)
changeset 101f44c530f80da
parent 100 ad92d11d59b8
child 102 ff0aa9b1397a
Fix bug #24: Accept alternate form of newline
bookloupe/bookloupe.c
bookloupe/bookloupe.h
doc/loupe-test.txt
test/bookloupe/Makefile.am
test/bookloupe/os9-lineends.tst
test/bookloupe/unix-lineends.tst
test/harness/testcase.c
test/harness/testcase.h
test/harness/testcaseinput.c
test/harness/testcaseio.c
test/harness/warningsparser.c
     1.1 --- a/bookloupe/bookloupe.c	Tue Oct 15 09:16:04 2013 +0100
     1.2 +++ b/bookloupe/bookloupe.c	Sat Oct 26 18:47:33 2013 +0100
     1.3 @@ -183,7 +183,7 @@
     1.4  
     1.5  gboolean mixdigit(const char *);
     1.6  gchar *getaword(const char **);
     1.7 -char *flgets(char **,long);
     1.8 +char *flgets(char **,long,int);
     1.9  void postprocess_for_HTML(char *);
    1.10  char *linehasmarkup(char *);
    1.11  char *losemarkup(char *);
    1.12 @@ -487,11 +487,40 @@
    1.13      gchar *inword;
    1.14      QuoteClass qc;
    1.15      lines=g_strsplit(etext,"\n",0);
    1.16 +    if (!lines[0])
    1.17 +    {
    1.18 +	/* An empty etext has no terminators */
    1.19 +	results.newlines=DOS_NEWLINES;
    1.20 +    }
    1.21 +    else if (!lines[1])
    1.22 +    {
    1.23 +	/*
    1.24 +	 * If there are no LFs, we don't have UNIX-style
    1.25 +	 * terminators, but we might have OS9-style ones.
    1.26 +	 */
    1.27 +	results.newlines=OS9_NEWLINES;
    1.28 +	g_strfreev(lines);
    1.29 +	lines=g_strsplit(etext,"\r",0);
    1.30 +	if (!lines[0] || !lines[1])
    1.31 +	    /* Looks like we don't have any terminators at all */
    1.32 +	    results.newlines=DOS_NEWLINES;
    1.33 +    }
    1.34 +    else
    1.35 +    {
    1.36 +	/* We might have UNIX-style terminators */
    1.37 +	results.newlines=UNIX_NEWLINES;
    1.38 +    }
    1.39      for (j=0;lines[j];j++)
    1.40      {
    1.41  	lbytes=strlen(lines[j]);
    1.42 -	while (lbytes>0 && lines[j][lbytes-1]=='\r')
    1.43 -	    lines[j][--lbytes]='\0';
    1.44 +	if (lbytes>0 && lines[j][lbytes-1]=='\r')
    1.45 +	{
    1.46 +	    results.newlines=DOS_NEWLINES;
    1.47 +	    do
    1.48 +	    {
    1.49 +		lines[j][--lbytes]='\0';
    1.50 +	    } while (lbytes>0 && lines[j][lbytes-1]=='\r');
    1.51 +	}
    1.52  	llen=g_utf8_strlen(lines[j],lbytes);
    1.53  	linecnt++;
    1.54  	if (strstr(lines[j],"*END") && strstr(lines[j],"SMALL PRINT") &&
    1.55 @@ -633,6 +662,13 @@
    1.56  struct warnings *report_first_pass(struct first_pass_results *results)
    1.57  {
    1.58      static struct warnings warnings={0};
    1.59 +    warnings.newlines=results->newlines;
    1.60 +    if (warnings.newlines==UNIX_NEWLINES)
    1.61 +	g_print("   --> No lines in this file have a CR. Not reporting them. "
    1.62 +	  "Project Gutenberg requires that all lineends be CR-LF.\n");
    1.63 +    else if (warnings.newlines==OS9_NEWLINES)
    1.64 +	g_print("   --> No lines in this file have a LF. Not reporting them. "
    1.65 +	  "Project Gutenberg requires that all lineends be CR-LF.\n");
    1.66      if (cnt_spacend>0)
    1.67  	g_print("   --> %ld lines in this file have white space at end\n",
    1.68  	  cnt_spacend);
    1.69 @@ -2621,7 +2657,7 @@
    1.70       */
    1.71      linecnt=0;
    1.72      etext_ptr=etext;
    1.73 -    while ((aline=flgets(&etext_ptr,linecnt+1)))
    1.74 +    while ((aline=flgets(&etext_ptr,linecnt+1,warnings->newlines)))
    1.75      {
    1.76  	linecnt++;
    1.77  	if (linecnt==1)
    1.78 @@ -2762,12 +2798,21 @@
    1.79  /*
    1.80   * flgets:
    1.81   *
    1.82 - * Get one line from the input text, checking for
    1.83 - * the existence of exactly one CR/LF line-end per line.
    1.84 + * Get one line from the input text. The setting of newlines has the following
    1.85 + * effect:
    1.86 + *
    1.87 + * DOS_NEWLINES: Check for the existence of exactly one CR-LF line-end per line.
    1.88 + *
    1.89 + * OS9_NEWLINES: Asserts that etext contains no LFs. CR is used as
    1.90 + *		 the newline character.
    1.91 + *
    1.92 + * UNIX_NEWLINES: Check for the presence of CRs.
    1.93 + *
    1.94 + * In all cases, check that the last line is correctly terminated.
    1.95   *
    1.96   * Returns: a pointer to the line.
    1.97   */
    1.98 -char *flgets(char **etext,long lcnt)
    1.99 +char *flgets(char **etext,long lcnt,int newlines)
   1.100  {
   1.101      gunichar c;
   1.102      gboolean isCR=FALSE;
   1.103 @@ -2790,8 +2835,15 @@
   1.104  		    g_free(s);
   1.105  		}
   1.106  		if (!pswit[OVERVIEW_SWITCH])
   1.107 -		    /* There may, or may not, have been a CR */
   1.108 -		    g_print("    Line %ld - No LF?\n",lcnt);
   1.109 +		{
   1.110 +		    if (newlines==OS9_NEWLINES)
   1.111 +			g_print("    Line %ld - No CR?\n",lcnt);
   1.112 +		    else
   1.113 +		    {
   1.114 +			/* There may, or may not, have been a CR */
   1.115 +			g_print("    Line %ld - No LF?\n",lcnt);
   1.116 +		    }
   1.117 +		}
   1.118  		else
   1.119  		    cnt_lineend++;
   1.120  	    }
   1.121 @@ -2801,9 +2853,7 @@
   1.122  	/* either way, it's end of line */
   1.123  	if (c=='\n')
   1.124  	{
   1.125 -	    if (isCR)
   1.126 -		break;
   1.127 -	    else
   1.128 +	    if (newlines==DOS_NEWLINES && !isCR)
   1.129  	    {
   1.130  		/* Error - a LF without a preceding CR */
   1.131  		if (pswit[LINE_END_SWITCH])
   1.132 @@ -2819,14 +2869,15 @@
   1.133  		    else
   1.134  			cnt_lineend++;
   1.135  		}
   1.136 -		break;
   1.137  	    }
   1.138 +	    break;
   1.139  	}
   1.140  	if (c=='\r')
   1.141  	{
   1.142 -	    if (isCR)
   1.143 +	    if (newlines==OS9_NEWLINES)
   1.144 +		break;
   1.145 +	    if (isCR || newlines==UNIX_NEWLINES)
   1.146  	    {
   1.147 -		/* Error - two successive CRs */
   1.148  		if (pswit[LINE_END_SWITCH])
   1.149  		{
   1.150  		    if (pswit[ECHO_SWITCH])
   1.151 @@ -2836,12 +2887,22 @@
   1.152  			g_free(s);
   1.153  		    }
   1.154  		    if (!pswit[OVERVIEW_SWITCH])
   1.155 -			g_print("    Line %ld - Two successive CRs?\n",lcnt);
   1.156 +		    {
   1.157 +			if (newlines==UNIX_NEWLINES)
   1.158 +			    g_print("    Line %ld column %ld - Embedded CR?\n",
   1.159 +			      lcnt,g_utf8_pointer_to_offset(theline,eos)+1);
   1.160 +			else
   1.161 +			    g_print("    Line %ld - Two successive CRs?\n",
   1.162 +			      lcnt);
   1.163 +		    }
   1.164  		    else
   1.165  			cnt_lineend++;
   1.166  		}
   1.167 +		if (newlines==UNIX_NEWLINES)
   1.168 +		    *eos=' ';
   1.169  	    }
   1.170 -	    isCR=TRUE;
   1.171 +	    if (newlines==DOS_NEWLINES)
   1.172 +		isCR=TRUE;
   1.173  	}
   1.174  	else
   1.175  	{
     2.1 --- a/bookloupe/bookloupe.h	Tue Oct 15 09:16:04 2013 +0100
     2.2 +++ b/bookloupe/bookloupe.h	Sat Oct 26 18:47:33 2013 +0100
     2.3 @@ -58,6 +58,12 @@
     2.4      SWITNO
     2.5  };
     2.6  
     2.7 +enum {
     2.8 +    DOS_NEWLINES,
     2.9 +    UNIX_NEWLINES,
    2.10 +    OS9_NEWLINES,
    2.11 +};
    2.12 +
    2.13  struct dash_results {
    2.14      long base,space,non_PG_space,PG_space;
    2.15  };
    2.16 @@ -68,12 +74,13 @@
    2.17      long fslashline,hyphens,longline,verylongline,htmcount,standalone_digit;
    2.18      long spacedash;
    2.19      struct dash_results emdash;
    2.20 +    int newlines;
    2.21      int Dutchcount,Frenchcount;
    2.22  };
    2.23  
    2.24  struct warnings {
    2.25      int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen;
    2.26 -    int endquote;
    2.27 +    int endquote,newlines;
    2.28      gboolean isDutch,isFrench;
    2.29  };
    2.30  
     3.1 --- a/doc/loupe-test.txt	Tue Oct 15 09:16:04 2013 +0100
     3.2 +++ b/doc/loupe-test.txt	Sat Oct 26 18:47:33 2013 +0100
     3.3 @@ -91,14 +91,35 @@
     3.4  ------------------
     3.5  
     3.6  One of the tests that bookloupe/gutcheck need to do is check that all
     3.7 -lines are ended with CR NL (as required by PG) rather than the UNIX
     3.8 -standard NL. loupe-test deliberately ignores the line endings in testcase
     3.9 -definition files and always uses CR NL. Thus there is needed a means
    3.10 +lines are ended with CR LF (as required by PG) rather than the UNIX
    3.11 +standard LF. loupe-test deliberately ignores the line endings in testcase
    3.12 +definition files and uses the expected CR LF. Thus there is needed a means
    3.13  to embed a linefeed (aka newline) character into the input to be sent
    3.14  to bookloupe/gutcheck to test that it correctly identified the problem.
    3.15  loupe-test recognises the unicode symbol for linefeed (U+240A): ␊ which
    3.16  can be used for this purpose instead of a normal newline.
    3.17  
    3.18 +UNIX-style newlines
    3.19 +-------------------
    3.20 +
    3.21 +To make life easier for users on UNIX and similar platforms, bookloupe
    3.22 +recognises the case of all lines terminated with UNIX-style newlines.
    3.23 +It notes this in the summary but does not issue any warnings. We thus
    3.24 +need some way to test this case which we do by the NEWLINES tag:
    3.25 +
    3.26 +  ┌──────────────────────────────────────────────────────────────────────────┐
    3.27 +  │**************** NEWLINES ****************                                │
    3.28 +  │LF                                                                        │
    3.29 +  │**************** INPUT ****************                                   │
    3.30 +  │Katherine was assailed by a sudden doubt. Had she mailed that letter? Yes,│
    3.31 +  │she was certain of that. She had run out to the mail box at ten o'clock   │
    3.32 +  │at night especially to mail it. What had gone wrong? Why wasn't there     │
    3.33 +  │someone to meet her?                                                      │
    3.34 +  └──────────────────────────────────────────────────────────────────────────┘
    3.35 +
    3.36 +The possible options are CRLF for DOS-style newlines (the default) and
    3.37 +LF for UNIX-style newlines.
    3.38 +
    3.39  Passing command line options
    3.40  ----------------------------
    3.41  
    3.42 @@ -203,3 +224,16 @@
    3.43  this, eg.:
    3.44  
    3.45  sample: PASS (with 1 of 1 false positives and 1 of 1 false negatives)
    3.46 +
    3.47 +The summary
    3.48 +-----------
    3.49 +
    3.50 +As part of the header (the first section of output), bookloupe may display
    3.51 +a number of summary lines. These are characterized by a leading ASCII
    3.52 +long arrow (-->) and generally say something about the ebook as a whole
    3.53 +rather than individual lines. Where it is desired to test for the presence
    3.54 +of a summary line, a "summary" node can be included within the "expected"
    3.55 +node of a testcase using structured warnings. The "summary" node can contain
    3.56 +one or more "text" nodes which indicate the text of lines that must be
    3.57 +present in the summary section in order for the test to pass. No account is
    3.58 +taken of the order of such lines and other summary lines may also be present.
     4.1 --- a/test/bookloupe/Makefile.am	Tue Oct 15 09:16:04 2013 +0100
     4.2 +++ b/test/bookloupe/Makefile.am	Sat Oct 26 18:47:33 2013 +0100
     4.3 @@ -1,6 +1,6 @@
     4.4  TESTS_ENVIRONMENT=BOOKLOUPE=../../bookloupe/bookloupe ../harness/loupe-test
     4.5  TESTS=non-ascii.tst long-line.tst curved-single-quotes.tst curved-quotes.tst \
     4.6  	runfox-quotes.tst curved-genitives.tst multi-line-illustration.tst \
     4.7 -	emdash.tst footnote-marker.tst
     4.8 +	emdash.tst footnote-marker.tst unix-lineends.tst os9-lineends.tst
     4.9  
    4.10  dist_pkgdata_DATA=$(TESTS)
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/test/bookloupe/os9-lineends.tst	Sat Oct 26 18:47:33 2013 +0100
     5.3 @@ -0,0 +1,13 @@
     5.4 +**************** NEWLINES ****************
     5.5 +CR
     5.6 +**************** INPUT ****************
     5.7 +Katherine was assailed by a sudden doubt. Had she mailed that letter? Yes,
     5.8 +she was certain of that. She had run out to the mail box at ten o'clock
     5.9 +at night especially to mail it. What had gone wrong? Why wasn't there
    5.10 +someone to meet her?
    5.11 +**************** WARNINGS ****************
    5.12 +<expected>
    5.13 +  <summary>
    5.14 +    <text>No lines in this file have a LF. Not reporting them. Project Gutenberg requires that all lineends be CR-LF.</text>
    5.15 +  </summary>
    5.16 +</expected>
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/test/bookloupe/unix-lineends.tst	Sat Oct 26 18:47:33 2013 +0100
     6.3 @@ -0,0 +1,17 @@
     6.4 +**************** NEWLINES ****************
     6.5 +LF
     6.6 +**************** INPUT ****************
     6.7 +Katherine was assailed by a sudden doubt. Had she mailed that letter? Yes,
     6.8 +she was certain of that. She had run out to the mail box at ten o'clock
     6.9 +at night especially to mail it. What had gone wrong?␍Why wasn't there
    6.10 +someone to meet her?
    6.11 +**************** WARNINGS ****************
    6.12 +<expected>
    6.13 +  <summary>
    6.14 +    <text>No lines in this file have a CR. Not reporting them. Project Gutenberg requires that all lineends be CR-LF.</text>
    6.15 +  </summary>
    6.16 +  <error>
    6.17 +    <at line="3" column="53"/>
    6.18 +    <text>Embedded CR?</text>
    6.19 +  </error>
    6.20 +</expected>
     7.1 --- a/test/harness/testcase.c	Tue Oct 15 09:16:04 2013 +0100
     7.2 +++ b/test/harness/testcase.c	Sat Oct 26 18:47:33 2013 +0100
     7.3 @@ -326,6 +326,42 @@
     7.4  }
     7.5  
     7.6  /*
     7.7 + * Check the summary produced by bookloupe against testcase->summary.
     7.8 + */
     7.9 +static gboolean testcase_check_summary(Testcase *testcase,const char *summary)
    7.10 +{
    7.11 +    int i;
    7.12 +    gboolean r;
    7.13 +    gchar **lines;
    7.14 +    GSList *texts,*lnk;
    7.15 +    if (!testcase->summary.texts)
    7.16 +	return TRUE;
    7.17 +    texts=g_slist_copy(testcase->summary.texts);
    7.18 +    lines=g_strsplit(summary,"\n",0);
    7.19 +    for(i=0;lines[i];i++)
    7.20 +    {
    7.21 +	if (!g_str_has_prefix(lines[i],"   --> "))
    7.22 +	    continue;
    7.23 +	for(lnk=texts;lnk;lnk=lnk->next)
    7.24 +	    if (!strcmp(lines[i]+7,lnk->data))
    7.25 +	    {
    7.26 +		texts=g_slist_delete_link(texts,lnk);
    7.27 +		break;
    7.28 +	    }
    7.29 +    }
    7.30 +    g_strfreev(lines);
    7.31 +    r=!texts;
    7.32 +    if (texts)
    7.33 +    {
    7.34 +	g_print("%s: FAIL\n",testcase->basename);
    7.35 +	g_print("Missing summary text from bookloupe:\n");
    7.36 +	g_print("   --> %s\n",texts->data);
    7.37 +    }
    7.38 +    g_slist_free(texts);
    7.39 +    return r;
    7.40 +}
    7.41 +
    7.42 +/*
    7.43   * Check the warnings produced by bookloupe against either the
    7.44   * unstructured testcase->expected or the structured testcase->warnings
    7.45   * as appropriate.
    7.46 @@ -460,7 +496,7 @@
    7.47      gboolean r;
    7.48      size_t pos,offset;
    7.49      GString *header;
    7.50 -    char *output,*filename,*s,*xfail=NULL;
    7.51 +    char *output,*filename,*s,*summary,*xfail=NULL;
    7.52      GError *error=NULL;
    7.53      if (!testcase_create_input_files(testcase,&error))
    7.54      {
    7.55 @@ -500,10 +536,15 @@
    7.56      pos=header->len;
    7.57      if (r)
    7.58      {
    7.59 -	/* Skip the summary */
    7.60 +	/* Find the end of the summary */
    7.61  	s=strstr(output+pos,"\n\n");
    7.62  	if (s)
    7.63 +	{
    7.64 +	    summary=g_strndup(output+pos,s-(output+pos));
    7.65 +	    r=testcase_check_summary(testcase,summary);
    7.66 +	    g_free(summary);
    7.67  	    pos=s-output+2;
    7.68 +	}
    7.69  	else
    7.70  	{
    7.71  	    g_print("%s: FAIL\n",testcase->basename);
    7.72 @@ -512,7 +553,8 @@
    7.73  	}
    7.74      }
    7.75      g_string_free(header,TRUE);
    7.76 -    r=testcase_check_warnings(testcase,output+pos,&xfail);
    7.77 +    if (r)
    7.78 +	r=testcase_check_warnings(testcase,output+pos,&xfail);
    7.79      g_free(filename);
    7.80      g_free(output);
    7.81      if (r)
     8.1 --- a/test/harness/testcase.h	Tue Oct 15 09:16:04 2013 +0100
     8.2 +++ b/test/harness/testcase.h	Sat Oct 26 18:47:33 2013 +0100
     8.3 @@ -15,6 +15,10 @@
     8.4  } TestcaseLocation;
     8.5  
     8.6  typedef struct {
     8.7 +    GSList *texts;
     8.8 +} TestcaseSummary;
     8.9 +
    8.10 +typedef struct {
    8.11      /*
    8.12       * Does this warning relate to a real problem in the etext
    8.13       * (eg., error and false-negative).
    8.14 @@ -38,12 +42,15 @@
    8.15      char *tmpdir;
    8.16      GSList *inputs;
    8.17      char *expected;
    8.18 +    TestcaseSummary summary;
    8.19      GSList *warnings;
    8.20      char *encoding;	/* The character encoding to talk to BOOKLOUPE in */
    8.21      char **options;
    8.22      enum {
    8.23  	TESTCASE_XFAIL=1<<0,
    8.24  	TESTCASE_TMP_DIR=1<<1,
    8.25 +	TESTCASE_UNIX_NEWLINES=1<<2,
    8.26 +	TESTCASE_OS9_NEWLINES=1<<3,
    8.27      } flags;
    8.28  } Testcase;
    8.29  
     9.1 --- a/test/harness/testcaseinput.c	Tue Oct 15 09:16:04 2013 +0100
     9.2 +++ b/test/harness/testcaseinput.c	Sat Oct 26 18:47:33 2013 +0100
     9.3 @@ -32,10 +32,10 @@
     9.4  }
     9.5  
     9.6  /*
     9.7 - * Replace \n with \r\n, U+240A (visible symbol for LF) with \n
     9.8 - * and U+240D (visible symbol for CR) with \r.
     9.9 + * Replace \n with requested newline, U+240A (visible symbol for LF)
    9.10 + * with \n and U+240D (visible symbol for CR) with \r.
    9.11   */
    9.12 -static char *unix2dos(const char *text)
    9.13 +static char *convert_newlines(const char *text,int flags)
    9.14  {
    9.15      gunichar c;
    9.16      const gunichar visible_lf=0x240A;
    9.17 @@ -46,8 +46,13 @@
    9.18      {
    9.19  	c=g_utf8_get_char(text);
    9.20  	text=g_utf8_next_char(text);
    9.21 -	if (c=='\n')
    9.22 -	    g_string_append(string,"\r\n");
    9.23 +	if (c=='\n' && !(flags&TESTCASE_UNIX_NEWLINES))
    9.24 +	{
    9.25 +	    if (flags&TESTCASE_OS9_NEWLINES)
    9.26 +		g_string_append_c(string,'\r');
    9.27 +	    else
    9.28 +		g_string_append(string,"\r\n");
    9.29 +	}
    9.30  	else if (c==visible_lf)
    9.31  	    g_string_append_c(string,'\n');
    9.32  	else if (c==visible_cr)
    9.33 @@ -76,7 +81,7 @@
    9.34      {
    9.35  	if (testcase->encoding)
    9.36  	{
    9.37 -	    t=unix2dos(input->contents);
    9.38 +	    t=convert_newlines(input->contents,testcase->flags);
    9.39  	    s=g_convert(t,-1,testcase->encoding,"UTF-8",NULL,&n,&tmp_err);
    9.40  	    g_free(t);
    9.41  	    if (!s)
    9.42 @@ -88,7 +93,7 @@
    9.43  	}
    9.44  	else
    9.45  	{
    9.46 -	    s=unix2dos(input->contents);
    9.47 +	    s=convert_newlines(input->contents,testcase->flags);
    9.48  	    n=strlen(s);
    9.49  	}
    9.50      }
    10.1 --- a/test/harness/testcaseio.c	Tue Oct 15 09:16:04 2013 +0100
    10.2 +++ b/test/harness/testcaseio.c	Sat Oct 26 18:47:33 2013 +0100
    10.3 @@ -22,7 +22,7 @@
    10.4      GError *err=NULL;
    10.5      char *s,*arg;
    10.6      const char *tag,*text;
    10.7 -    gboolean found_tag=FALSE;
    10.8 +    gboolean found_tag=FALSE,newlines_set=FALSE;
    10.9      parser=testcase_parser_new_from_file(filename);
   10.10      if (!parser)
   10.11  	return NULL;
   10.12 @@ -88,6 +88,26 @@
   10.13  	}
   10.14  	else if (!testcase->encoding && !strcmp(tag,"ENCODING"))
   10.15  	    testcase->encoding=g_strchomp(g_strdup(text));
   10.16 +	else if (!newlines_set && !strcmp(tag,"NEWLINES"))
   10.17 +	{
   10.18 +	    newlines_set=TRUE;
   10.19 +	    s=g_strdup(text);
   10.20 +	    g_strchomp(s);
   10.21 +	    if (!strcmp(s,"LF"))
   10.22 +		testcase->flags|=TESTCASE_UNIX_NEWLINES;
   10.23 +	    else if (!strcmp(s,"CR"))
   10.24 +		testcase->flags|=TESTCASE_OS9_NEWLINES;
   10.25 +	    else if (strcmp(s,"CRLF"))
   10.26 +	    {
   10.27 +		g_printerr(
   10.28 +		  "%s: Unrecognised style for newlines. Try CR or LF.\n",s);
   10.29 +		g_free(s);
   10.30 +		testcase_free(testcase);
   10.31 +		testcase_parser_free(parser);
   10.32 +		return NULL;
   10.33 +	    }
   10.34 +	    g_free(s);
   10.35 +	}
   10.36  	else if (!testcase->encoding && !strcmp(tag,"OPTIONS"))
   10.37  	{
   10.38  	    testcase->options=g_strsplit(text,"\n",0);
    11.1 --- a/test/harness/warningsparser.c	Tue Oct 15 09:16:04 2013 +0100
    11.2 +++ b/test/harness/warningsparser.c	Sat Oct 26 18:47:33 2013 +0100
    11.3 @@ -15,11 +15,12 @@
    11.4      enum {
    11.5  	WARNINGS_INIT,
    11.6  	WARNINGS_IN_EXPECTED,
    11.7 +	WARNINGS_IN_SUMMARY,
    11.8  	WARNINGS_IN_WARNING,
    11.9  	WARNINGS_IN_AT,
   11.10  	WARNINGS_IN_TEXT,
   11.11  	WARNINGS_DONE,
   11.12 -    } state;
   11.13 +    } state,parent_state;
   11.14  } WarningsBaton;
   11.15  
   11.16  static void warnings_parser_start_element(GMarkupParseContext *context,
   11.17 @@ -30,6 +31,7 @@
   11.18      guint64 tmp;
   11.19      char *endp;
   11.20      WarningsBaton *baton=user_data;
   11.21 +    baton->parent_state=baton->state;
   11.22      switch(baton->state)
   11.23      {
   11.24  	case WARNINGS_INIT:
   11.25 @@ -45,20 +47,36 @@
   11.26  		baton->state=WARNINGS_IN_EXPECTED;
   11.27  	    break;
   11.28  	case WARNINGS_IN_EXPECTED:
   11.29 -	    baton->warning=g_new0(TestcaseWarning,1);
   11.30 -	    if (!strcmp(element_name,"error"))
   11.31 -		baton->warning->is_real=TRUE;
   11.32 -	    else if (!strcmp(element_name,"false-positive"))
   11.33 -		baton->warning->xfail=TRUE;
   11.34 -	    else if (!strcmp(element_name,"false-negative"))
   11.35 -		baton->warning->is_real=baton->warning->xfail=TRUE;
   11.36 +	    if (!strcmp(element_name,"summary"))
   11.37 +	    {
   11.38 +		if (baton->testcase->summary.texts)
   11.39 +		{
   11.40 +		    g_set_error(error,G_MARKUP_ERROR,
   11.41 +		      G_MARKUP_ERROR_INVALID_CONTENT,"Multiple summary "
   11.42 +		      "elements are not valid");
   11.43 +		}
   11.44 +		else
   11.45 +		    baton->state=WARNINGS_IN_SUMMARY;
   11.46 +	    }
   11.47  	    else
   11.48  	    {
   11.49 -		g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_UNKNOWN_ELEMENT,
   11.50 -		  "Unknown element in 'expected': '%s'",element_name);
   11.51 -		g_free(baton->warning);
   11.52 -		baton->warning=NULL;
   11.53 -		return;
   11.54 +		baton->warning=g_new0(TestcaseWarning,1);
   11.55 +		if (!strcmp(element_name,"error"))
   11.56 +		    baton->warning->is_real=TRUE;
   11.57 +		else if (!strcmp(element_name,"false-positive"))
   11.58 +		    baton->warning->xfail=TRUE;
   11.59 +		else if (!strcmp(element_name,"false-negative"))
   11.60 +		    baton->warning->is_real=baton->warning->xfail=TRUE;
   11.61 +		else
   11.62 +		{
   11.63 +		    g_set_error(error,G_MARKUP_ERROR,
   11.64 +		      G_MARKUP_ERROR_UNKNOWN_ELEMENT,
   11.65 +		      "Unknown element in 'expected': '%s'",element_name);
   11.66 +		    g_free(baton->warning);
   11.67 +		    baton->warning=NULL;
   11.68 +		    return;
   11.69 +		}
   11.70 +		baton->state=WARNINGS_IN_WARNING;
   11.71  	    }
   11.72  	    if (attribute_names[0])
   11.73  	    {
   11.74 @@ -66,12 +84,28 @@
   11.75  		  G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
   11.76  		  "Unknown attribute on element '%s': '%s'",element_name,
   11.77  		  attribute_names[0]);
   11.78 -		g_free(baton->warning);
   11.79 -		baton->warning=NULL;
   11.80 +		if (baton->state==WARNINGS_IN_WARNING)
   11.81 +		{
   11.82 +		    g_free(baton->warning);
   11.83 +		    baton->warning=NULL;
   11.84 +		}
   11.85 +		baton->state=WARNINGS_IN_EXPECTED;
   11.86  		return;
   11.87  	    }
   11.88 -	    else
   11.89 -		baton->state=WARNINGS_IN_WARNING;
   11.90 +	    break;
   11.91 +	case WARNINGS_IN_SUMMARY:
   11.92 +	    if (!strcmp(element_name,"text"))
   11.93 +	    {
   11.94 +		if (attribute_names[0])
   11.95 +		{
   11.96 +		    g_set_error(error,G_MARKUP_ERROR,
   11.97 +		      G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
   11.98 +		      "Unknown attribute on element 'text': '%s'",
   11.99 +		      attribute_names[0]);
  11.100 +		    return;
  11.101 +		}
  11.102 +		baton->state=WARNINGS_IN_TEXT;
  11.103 +	    }
  11.104  	    break;
  11.105  	case WARNINGS_IN_WARNING:
  11.106  	    if (!strcmp(element_name,"at"))
  11.107 @@ -162,6 +196,15 @@
  11.108  	      g_slist_reverse(baton->testcase->warnings);
  11.109  	    baton->state=WARNINGS_DONE;
  11.110  	    break;
  11.111 +	case WARNINGS_IN_SUMMARY:
  11.112 +	    if (!baton->testcase->summary.texts)
  11.113 +		g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_INVALID_CONTENT,
  11.114 +		  "Summary element must contain at least one text element");
  11.115 +	    else
  11.116 +		baton->testcase->summary.texts=
  11.117 +		  g_slist_reverse(baton->testcase->summary.texts);
  11.118 +	    baton->state=WARNINGS_IN_EXPECTED;
  11.119 +	    break;
  11.120  	case WARNINGS_IN_WARNING:
  11.121  	    baton->warning->locations=
  11.122  	      g_slist_reverse(baton->warning->locations);
  11.123 @@ -177,7 +220,7 @@
  11.124  	    baton->state=WARNINGS_IN_WARNING;
  11.125  	    break;
  11.126  	case WARNINGS_IN_TEXT:
  11.127 -	    baton->state=WARNINGS_IN_WARNING;
  11.128 +	    baton->state=baton->parent_state;
  11.129  	    break;
  11.130  	default:
  11.131  	    g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_UNKNOWN_ELEMENT,
  11.132 @@ -198,6 +241,11 @@
  11.133  		g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_INVALID_CONTENT,
  11.134  		  "The 'expected' tag does not take any content");
  11.135  	    break;
  11.136 +	case WARNINGS_IN_SUMMARY:
  11.137 +	    if (strspn(text," \t\n")!=text_len)
  11.138 +		g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_INVALID_CONTENT,
  11.139 +		  "The summary tags do not take any content");
  11.140 +	    break;
  11.141  	case WARNINGS_IN_WARNING:
  11.142  	    if (strspn(text," \t\n")!=text_len)
  11.143  		g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_INVALID_CONTENT,
  11.144 @@ -211,7 +259,10 @@
  11.145  	case WARNINGS_IN_TEXT:
  11.146  	    s=g_strdup(text+strspn(text," \t\n"));
  11.147  	    g_strchomp(s);
  11.148 -	    if (baton->warning->text)
  11.149 +	    if (baton->parent_state==WARNINGS_IN_SUMMARY)
  11.150 +		baton->testcase->summary.texts=
  11.151 +		  g_slist_prepend(baton->testcase->summary.texts,s);
  11.152 +	    else if (baton->warning->text)
  11.153  	    {
  11.154  		t=g_strconcat(baton->warning->text,s,NULL);
  11.155  		g_free(baton->warning->text);
  11.156 @@ -237,6 +288,7 @@
  11.157      parser.text=warnings_parser_text;
  11.158      baton=g_new0(WarningsBaton,1);
  11.159      baton->testcase=testcase;
  11.160 +    baton->parent_state=WARNINGS_INIT;
  11.161      baton->state=WARNINGS_INIT;
  11.162      return g_markup_parse_context_new(&parser,
  11.163        G_MARKUP_TREAT_CDATA_AS_TEXT|G_MARKUP_PREFIX_ERROR_POSITION,