# HG changeset patch
# User ali <ali@juiblex.co.uk>
# Date 1382809653 -3600
# Node ID f44c530f80da57947086d347e820a285e62bce92
# Parent  ad92d11d59b812c1599d942452371f734742993f
Fix bug #24: Accept alternate form of newline

diff -r ad92d11d59b8 -r f44c530f80da bookloupe/bookloupe.c
--- a/bookloupe/bookloupe.c	Tue Oct 15 09:16:04 2013 +0100
+++ b/bookloupe/bookloupe.c	Sat Oct 26 18:47:33 2013 +0100
@@ -183,7 +183,7 @@
 
 gboolean mixdigit(const char *);
 gchar *getaword(const char **);
-char *flgets(char **,long);
+char *flgets(char **,long,int);
 void postprocess_for_HTML(char *);
 char *linehasmarkup(char *);
 char *losemarkup(char *);
@@ -487,11 +487,40 @@
     gchar *inword;
     QuoteClass qc;
     lines=g_strsplit(etext,"\n",0);
+    if (!lines[0])
+    {
+	/* An empty etext has no terminators */
+	results.newlines=DOS_NEWLINES;
+    }
+    else if (!lines[1])
+    {
+	/*
+	 * If there are no LFs, we don't have UNIX-style
+	 * terminators, but we might have OS9-style ones.
+	 */
+	results.newlines=OS9_NEWLINES;
+	g_strfreev(lines);
+	lines=g_strsplit(etext,"\r",0);
+	if (!lines[0] || !lines[1])
+	    /* Looks like we don't have any terminators at all */
+	    results.newlines=DOS_NEWLINES;
+    }
+    else
+    {
+	/* We might have UNIX-style terminators */
+	results.newlines=UNIX_NEWLINES;
+    }
     for (j=0;lines[j];j++)
     {
 	lbytes=strlen(lines[j]);
-	while (lbytes>0 && lines[j][lbytes-1]=='\r')
-	    lines[j][--lbytes]='\0';
+	if (lbytes>0 && lines[j][lbytes-1]=='\r')
+	{
+	    results.newlines=DOS_NEWLINES;
+	    do
+	    {
+		lines[j][--lbytes]='\0';
+	    } while (lbytes>0 && lines[j][lbytes-1]=='\r');
+	}
 	llen=g_utf8_strlen(lines[j],lbytes);
 	linecnt++;
 	if (strstr(lines[j],"*END") && strstr(lines[j],"SMALL PRINT") &&
@@ -633,6 +662,13 @@
 struct warnings *report_first_pass(struct first_pass_results *results)
 {
     static struct warnings warnings={0};
+    warnings.newlines=results->newlines;
+    if (warnings.newlines==UNIX_NEWLINES)
+	g_print("   --> No lines in this file have a CR. Not reporting them. "
+	  "Project Gutenberg requires that all lineends be CR-LF.\n");
+    else if (warnings.newlines==OS9_NEWLINES)
+	g_print("   --> No lines in this file have a LF. Not reporting them. "
+	  "Project Gutenberg requires that all lineends be CR-LF.\n");
     if (cnt_spacend>0)
 	g_print("   --> %ld lines in this file have white space at end\n",
 	  cnt_spacend);
@@ -2621,7 +2657,7 @@
      */
     linecnt=0;
     etext_ptr=etext;
-    while ((aline=flgets(&etext_ptr,linecnt+1)))
+    while ((aline=flgets(&etext_ptr,linecnt+1,warnings->newlines)))
     {
 	linecnt++;
 	if (linecnt==1)
@@ -2762,12 +2798,21 @@
 /*
  * flgets:
  *
- * Get one line from the input text, checking for
- * the existence of exactly one CR/LF line-end per line.
+ * Get one line from the input text. The setting of newlines has the following
+ * effect:
+ *
+ * DOS_NEWLINES: Check for the existence of exactly one CR-LF line-end per line.
+ *
+ * OS9_NEWLINES: Asserts that etext contains no LFs. CR is used as
+ *		 the newline character.
+ *
+ * UNIX_NEWLINES: Check for the presence of CRs.
+ *
+ * In all cases, check that the last line is correctly terminated.
  *
  * Returns: a pointer to the line.
  */
-char *flgets(char **etext,long lcnt)
+char *flgets(char **etext,long lcnt,int newlines)
 {
     gunichar c;
     gboolean isCR=FALSE;
@@ -2790,8 +2835,15 @@
 		    g_free(s);
 		}
 		if (!pswit[OVERVIEW_SWITCH])
-		    /* There may, or may not, have been a CR */
-		    g_print("    Line %ld - No LF?\n",lcnt);
+		{
+		    if (newlines==OS9_NEWLINES)
+			g_print("    Line %ld - No CR?\n",lcnt);
+		    else
+		    {
+			/* There may, or may not, have been a CR */
+			g_print("    Line %ld - No LF?\n",lcnt);
+		    }
+		}
 		else
 		    cnt_lineend++;
 	    }
@@ -2801,9 +2853,7 @@
 	/* either way, it's end of line */
 	if (c=='\n')
 	{
-	    if (isCR)
-		break;
-	    else
+	    if (newlines==DOS_NEWLINES && !isCR)
 	    {
 		/* Error - a LF without a preceding CR */
 		if (pswit[LINE_END_SWITCH])
@@ -2819,14 +2869,15 @@
 		    else
 			cnt_lineend++;
 		}
-		break;
 	    }
+	    break;
 	}
 	if (c=='\r')
 	{
-	    if (isCR)
+	    if (newlines==OS9_NEWLINES)
+		break;
+	    if (isCR || newlines==UNIX_NEWLINES)
 	    {
-		/* Error - two successive CRs */
 		if (pswit[LINE_END_SWITCH])
 		{
 		    if (pswit[ECHO_SWITCH])
@@ -2836,12 +2887,22 @@
 			g_free(s);
 		    }
 		    if (!pswit[OVERVIEW_SWITCH])
-			g_print("    Line %ld - Two successive CRs?\n",lcnt);
+		    {
+			if (newlines==UNIX_NEWLINES)
+			    g_print("    Line %ld column %ld - Embedded CR?\n",
+			      lcnt,g_utf8_pointer_to_offset(theline,eos)+1);
+			else
+			    g_print("    Line %ld - Two successive CRs?\n",
+			      lcnt);
+		    }
 		    else
 			cnt_lineend++;
 		}
+		if (newlines==UNIX_NEWLINES)
+		    *eos=' ';
 	    }
-	    isCR=TRUE;
+	    if (newlines==DOS_NEWLINES)
+		isCR=TRUE;
 	}
 	else
 	{
diff -r ad92d11d59b8 -r f44c530f80da bookloupe/bookloupe.h
--- a/bookloupe/bookloupe.h	Tue Oct 15 09:16:04 2013 +0100
+++ b/bookloupe/bookloupe.h	Sat Oct 26 18:47:33 2013 +0100
@@ -58,6 +58,12 @@
     SWITNO
 };
 
+enum {
+    DOS_NEWLINES,
+    UNIX_NEWLINES,
+    OS9_NEWLINES,
+};
+
 struct dash_results {
     long base,space,non_PG_space,PG_space;
 };
@@ -68,12 +74,13 @@
     long fslashline,hyphens,longline,verylongline,htmcount,standalone_digit;
     long spacedash;
     struct dash_results emdash;
+    int newlines;
     int Dutchcount,Frenchcount;
 };
 
 struct warnings {
     int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen;
-    int endquote;
+    int endquote,newlines;
     gboolean isDutch,isFrench;
 };
 
diff -r ad92d11d59b8 -r f44c530f80da doc/loupe-test.txt
--- a/doc/loupe-test.txt	Tue Oct 15 09:16:04 2013 +0100
+++ b/doc/loupe-test.txt	Sat Oct 26 18:47:33 2013 +0100
@@ -91,14 +91,35 @@
 ------------------
 
 One of the tests that bookloupe/gutcheck need to do is check that all
-lines are ended with CR NL (as required by PG) rather than the UNIX
-standard NL. loupe-test deliberately ignores the line endings in testcase
-definition files and always uses CR NL. Thus there is needed a means
+lines are ended with CR LF (as required by PG) rather than the UNIX
+standard LF. loupe-test deliberately ignores the line endings in testcase
+definition files and uses the expected CR LF. Thus there is needed a means
 to embed a linefeed (aka newline) character into the input to be sent
 to bookloupe/gutcheck to test that it correctly identified the problem.
 loupe-test recognises the unicode symbol for linefeed (U+240A): ␊ which
 can be used for this purpose instead of a normal newline.
 
+UNIX-style newlines
+-------------------
+
+To make life easier for users on UNIX and similar platforms, bookloupe
+recognises the case of all lines terminated with UNIX-style newlines.
+It notes this in the summary but does not issue any warnings. We thus
+need some way to test this case which we do by the NEWLINES tag:
+
+  ┌──────────────────────────────────────────────────────────────────────────┐
+  │**************** NEWLINES ****************                                │
+  │LF                                                                        │
+  │**************** INPUT ****************                                   │
+  │Katherine was assailed by a sudden doubt. Had she mailed that letter? Yes,│
+  │she was certain of that. She had run out to the mail box at ten o'clock   │
+  │at night especially to mail it. What had gone wrong? Why wasn't there     │
+  │someone to meet her?                                                      │
+  └──────────────────────────────────────────────────────────────────────────┘
+
+The possible options are CRLF for DOS-style newlines (the default) and
+LF for UNIX-style newlines.
+
 Passing command line options
 ----------------------------
 
@@ -203,3 +224,16 @@
 this, eg.:
 
 sample: PASS (with 1 of 1 false positives and 1 of 1 false negatives)
+
+The summary
+-----------
+
+As part of the header (the first section of output), bookloupe may display
+a number of summary lines. These are characterized by a leading ASCII
+long arrow (-->) and generally say something about the ebook as a whole
+rather than individual lines. Where it is desired to test for the presence
+of a summary line, a "summary" node can be included within the "expected"
+node of a testcase using structured warnings. The "summary" node can contain
+one or more "text" nodes which indicate the text of lines that must be
+present in the summary section in order for the test to pass. No account is
+taken of the order of such lines and other summary lines may also be present.
diff -r ad92d11d59b8 -r f44c530f80da test/bookloupe/Makefile.am
--- a/test/bookloupe/Makefile.am	Tue Oct 15 09:16:04 2013 +0100
+++ b/test/bookloupe/Makefile.am	Sat Oct 26 18:47:33 2013 +0100
@@ -1,6 +1,6 @@
 TESTS_ENVIRONMENT=BOOKLOUPE=../../bookloupe/bookloupe ../harness/loupe-test
 TESTS=non-ascii.tst long-line.tst curved-single-quotes.tst curved-quotes.tst \
 	runfox-quotes.tst curved-genitives.tst multi-line-illustration.tst \
-	emdash.tst footnote-marker.tst
+	emdash.tst footnote-marker.tst unix-lineends.tst os9-lineends.tst
 
 dist_pkgdata_DATA=$(TESTS)
diff -r ad92d11d59b8 -r f44c530f80da test/bookloupe/os9-lineends.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/bookloupe/os9-lineends.tst	Sat Oct 26 18:47:33 2013 +0100
@@ -0,0 +1,13 @@
+**************** NEWLINES ****************
+CR
+**************** INPUT ****************
+Katherine was assailed by a sudden doubt. Had she mailed that letter? Yes,
+she was certain of that. She had run out to the mail box at ten o'clock
+at night especially to mail it. What had gone wrong? Why wasn't there
+someone to meet her?
+**************** WARNINGS ****************
+<expected>
+  <summary>
+    <text>No lines in this file have a LF. Not reporting them. Project Gutenberg requires that all lineends be CR-LF.</text>
+  </summary>
+</expected>
diff -r ad92d11d59b8 -r f44c530f80da test/bookloupe/unix-lineends.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/bookloupe/unix-lineends.tst	Sat Oct 26 18:47:33 2013 +0100
@@ -0,0 +1,17 @@
+**************** NEWLINES ****************
+LF
+**************** INPUT ****************
+Katherine was assailed by a sudden doubt. Had she mailed that letter? Yes,
+she was certain of that. She had run out to the mail box at ten o'clock
+at night especially to mail it. What had gone wrong?␍Why wasn't there
+someone to meet her?
+**************** WARNINGS ****************
+<expected>
+  <summary>
+    <text>No lines in this file have a CR. Not reporting them. Project Gutenberg requires that all lineends be CR-LF.</text>
+  </summary>
+  <error>
+    <at line="3" column="53"/>
+    <text>Embedded CR?</text>
+  </error>
+</expected>
diff -r ad92d11d59b8 -r f44c530f80da test/harness/testcase.c
--- a/test/harness/testcase.c	Tue Oct 15 09:16:04 2013 +0100
+++ b/test/harness/testcase.c	Sat Oct 26 18:47:33 2013 +0100
@@ -326,6 +326,42 @@
 }
 
 /*
+ * Check the summary produced by bookloupe against testcase->summary.
+ */
+static gboolean testcase_check_summary(Testcase *testcase,const char *summary)
+{
+    int i;
+    gboolean r;
+    gchar **lines;
+    GSList *texts,*lnk;
+    if (!testcase->summary.texts)
+	return TRUE;
+    texts=g_slist_copy(testcase->summary.texts);
+    lines=g_strsplit(summary,"\n",0);
+    for(i=0;lines[i];i++)
+    {
+	if (!g_str_has_prefix(lines[i],"   --> "))
+	    continue;
+	for(lnk=texts;lnk;lnk=lnk->next)
+	    if (!strcmp(lines[i]+7,lnk->data))
+	    {
+		texts=g_slist_delete_link(texts,lnk);
+		break;
+	    }
+    }
+    g_strfreev(lines);
+    r=!texts;
+    if (texts)
+    {
+	g_print("%s: FAIL\n",testcase->basename);
+	g_print("Missing summary text from bookloupe:\n");
+	g_print("   --> %s\n",texts->data);
+    }
+    g_slist_free(texts);
+    return r;
+}
+
+/*
  * Check the warnings produced by bookloupe against either the
  * unstructured testcase->expected or the structured testcase->warnings
  * as appropriate.
@@ -460,7 +496,7 @@
     gboolean r;
     size_t pos,offset;
     GString *header;
-    char *output,*filename,*s,*xfail=NULL;
+    char *output,*filename,*s,*summary,*xfail=NULL;
     GError *error=NULL;
     if (!testcase_create_input_files(testcase,&error))
     {
@@ -500,10 +536,15 @@
     pos=header->len;
     if (r)
     {
-	/* Skip the summary */
+	/* Find the end of the summary */
 	s=strstr(output+pos,"\n\n");
 	if (s)
+	{
+	    summary=g_strndup(output+pos,s-(output+pos));
+	    r=testcase_check_summary(testcase,summary);
+	    g_free(summary);
 	    pos=s-output+2;
+	}
 	else
 	{
 	    g_print("%s: FAIL\n",testcase->basename);
@@ -512,7 +553,8 @@
 	}
     }
     g_string_free(header,TRUE);
-    r=testcase_check_warnings(testcase,output+pos,&xfail);
+    if (r)
+	r=testcase_check_warnings(testcase,output+pos,&xfail);
     g_free(filename);
     g_free(output);
     if (r)
diff -r ad92d11d59b8 -r f44c530f80da test/harness/testcase.h
--- a/test/harness/testcase.h	Tue Oct 15 09:16:04 2013 +0100
+++ b/test/harness/testcase.h	Sat Oct 26 18:47:33 2013 +0100
@@ -15,6 +15,10 @@
 } TestcaseLocation;
 
 typedef struct {
+    GSList *texts;
+} TestcaseSummary;
+
+typedef struct {
     /*
      * Does this warning relate to a real problem in the etext
      * (eg., error and false-negative).
@@ -38,12 +42,15 @@
     char *tmpdir;
     GSList *inputs;
     char *expected;
+    TestcaseSummary summary;
     GSList *warnings;
     char *encoding;	/* The character encoding to talk to BOOKLOUPE in */
     char **options;
     enum {
 	TESTCASE_XFAIL=1<<0,
 	TESTCASE_TMP_DIR=1<<1,
+	TESTCASE_UNIX_NEWLINES=1<<2,
+	TESTCASE_OS9_NEWLINES=1<<3,
     } flags;
 } Testcase;
 
diff -r ad92d11d59b8 -r f44c530f80da test/harness/testcaseinput.c
--- a/test/harness/testcaseinput.c	Tue Oct 15 09:16:04 2013 +0100
+++ b/test/harness/testcaseinput.c	Sat Oct 26 18:47:33 2013 +0100
@@ -32,10 +32,10 @@
 }
 
 /*
- * Replace \n with \r\n, U+240A (visible symbol for LF) with \n
- * and U+240D (visible symbol for CR) with \r.
+ * Replace \n with requested newline, U+240A (visible symbol for LF)
+ * with \n and U+240D (visible symbol for CR) with \r.
  */
-static char *unix2dos(const char *text)
+static char *convert_newlines(const char *text,int flags)
 {
     gunichar c;
     const gunichar visible_lf=0x240A;
@@ -46,8 +46,13 @@
     {
 	c=g_utf8_get_char(text);
 	text=g_utf8_next_char(text);
-	if (c=='\n')
-	    g_string_append(string,"\r\n");
+	if (c=='\n' && !(flags&TESTCASE_UNIX_NEWLINES))
+	{
+	    if (flags&TESTCASE_OS9_NEWLINES)
+		g_string_append_c(string,'\r');
+	    else
+		g_string_append(string,"\r\n");
+	}
 	else if (c==visible_lf)
 	    g_string_append_c(string,'\n');
 	else if (c==visible_cr)
@@ -76,7 +81,7 @@
     {
 	if (testcase->encoding)
 	{
-	    t=unix2dos(input->contents);
+	    t=convert_newlines(input->contents,testcase->flags);
 	    s=g_convert(t,-1,testcase->encoding,"UTF-8",NULL,&n,&tmp_err);
 	    g_free(t);
 	    if (!s)
@@ -88,7 +93,7 @@
 	}
 	else
 	{
-	    s=unix2dos(input->contents);
+	    s=convert_newlines(input->contents,testcase->flags);
 	    n=strlen(s);
 	}
     }
diff -r ad92d11d59b8 -r f44c530f80da test/harness/testcaseio.c
--- a/test/harness/testcaseio.c	Tue Oct 15 09:16:04 2013 +0100
+++ b/test/harness/testcaseio.c	Sat Oct 26 18:47:33 2013 +0100
@@ -22,7 +22,7 @@
     GError *err=NULL;
     char *s,*arg;
     const char *tag,*text;
-    gboolean found_tag=FALSE;
+    gboolean found_tag=FALSE,newlines_set=FALSE;
     parser=testcase_parser_new_from_file(filename);
     if (!parser)
 	return NULL;
@@ -88,6 +88,26 @@
 	}
 	else if (!testcase->encoding && !strcmp(tag,"ENCODING"))
 	    testcase->encoding=g_strchomp(g_strdup(text));
+	else if (!newlines_set && !strcmp(tag,"NEWLINES"))
+	{
+	    newlines_set=TRUE;
+	    s=g_strdup(text);
+	    g_strchomp(s);
+	    if (!strcmp(s,"LF"))
+		testcase->flags|=TESTCASE_UNIX_NEWLINES;
+	    else if (!strcmp(s,"CR"))
+		testcase->flags|=TESTCASE_OS9_NEWLINES;
+	    else if (strcmp(s,"CRLF"))
+	    {
+		g_printerr(
+		  "%s: Unrecognised style for newlines. Try CR or LF.\n",s);
+		g_free(s);
+		testcase_free(testcase);
+		testcase_parser_free(parser);
+		return NULL;
+	    }
+	    g_free(s);
+	}
 	else if (!testcase->encoding && !strcmp(tag,"OPTIONS"))
 	{
 	    testcase->options=g_strsplit(text,"\n",0);
diff -r ad92d11d59b8 -r f44c530f80da test/harness/warningsparser.c
--- a/test/harness/warningsparser.c	Tue Oct 15 09:16:04 2013 +0100
+++ b/test/harness/warningsparser.c	Sat Oct 26 18:47:33 2013 +0100
@@ -15,11 +15,12 @@
     enum {
 	WARNINGS_INIT,
 	WARNINGS_IN_EXPECTED,
+	WARNINGS_IN_SUMMARY,
 	WARNINGS_IN_WARNING,
 	WARNINGS_IN_AT,
 	WARNINGS_IN_TEXT,
 	WARNINGS_DONE,
-    } state;
+    } state,parent_state;
 } WarningsBaton;
 
 static void warnings_parser_start_element(GMarkupParseContext *context,
@@ -30,6 +31,7 @@
     guint64 tmp;
     char *endp;
     WarningsBaton *baton=user_data;
+    baton->parent_state=baton->state;
     switch(baton->state)
     {
 	case WARNINGS_INIT:
@@ -45,20 +47,36 @@
 		baton->state=WARNINGS_IN_EXPECTED;
 	    break;
 	case WARNINGS_IN_EXPECTED:
-	    baton->warning=g_new0(TestcaseWarning,1);
-	    if (!strcmp(element_name,"error"))
-		baton->warning->is_real=TRUE;
-	    else if (!strcmp(element_name,"false-positive"))
-		baton->warning->xfail=TRUE;
-	    else if (!strcmp(element_name,"false-negative"))
-		baton->warning->is_real=baton->warning->xfail=TRUE;
+	    if (!strcmp(element_name,"summary"))
+	    {
+		if (baton->testcase->summary.texts)
+		{
+		    g_set_error(error,G_MARKUP_ERROR,
+		      G_MARKUP_ERROR_INVALID_CONTENT,"Multiple summary "
+		      "elements are not valid");
+		}
+		else
+		    baton->state=WARNINGS_IN_SUMMARY;
+	    }
 	    else
 	    {
-		g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_UNKNOWN_ELEMENT,
-		  "Unknown element in 'expected': '%s'",element_name);
-		g_free(baton->warning);
-		baton->warning=NULL;
-		return;
+		baton->warning=g_new0(TestcaseWarning,1);
+		if (!strcmp(element_name,"error"))
+		    baton->warning->is_real=TRUE;
+		else if (!strcmp(element_name,"false-positive"))
+		    baton->warning->xfail=TRUE;
+		else if (!strcmp(element_name,"false-negative"))
+		    baton->warning->is_real=baton->warning->xfail=TRUE;
+		else
+		{
+		    g_set_error(error,G_MARKUP_ERROR,
+		      G_MARKUP_ERROR_UNKNOWN_ELEMENT,
+		      "Unknown element in 'expected': '%s'",element_name);
+		    g_free(baton->warning);
+		    baton->warning=NULL;
+		    return;
+		}
+		baton->state=WARNINGS_IN_WARNING;
 	    }
 	    if (attribute_names[0])
 	    {
@@ -66,12 +84,28 @@
 		  G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
 		  "Unknown attribute on element '%s': '%s'",element_name,
 		  attribute_names[0]);
-		g_free(baton->warning);
-		baton->warning=NULL;
+		if (baton->state==WARNINGS_IN_WARNING)
+		{
+		    g_free(baton->warning);
+		    baton->warning=NULL;
+		}
+		baton->state=WARNINGS_IN_EXPECTED;
 		return;
 	    }
-	    else
-		baton->state=WARNINGS_IN_WARNING;
+	    break;
+	case WARNINGS_IN_SUMMARY:
+	    if (!strcmp(element_name,"text"))
+	    {
+		if (attribute_names[0])
+		{
+		    g_set_error(error,G_MARKUP_ERROR,
+		      G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
+		      "Unknown attribute on element 'text': '%s'",
+		      attribute_names[0]);
+		    return;
+		}
+		baton->state=WARNINGS_IN_TEXT;
+	    }
 	    break;
 	case WARNINGS_IN_WARNING:
 	    if (!strcmp(element_name,"at"))
@@ -162,6 +196,15 @@
 	      g_slist_reverse(baton->testcase->warnings);
 	    baton->state=WARNINGS_DONE;
 	    break;
+	case WARNINGS_IN_SUMMARY:
+	    if (!baton->testcase->summary.texts)
+		g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_INVALID_CONTENT,
+		  "Summary element must contain at least one text element");
+	    else
+		baton->testcase->summary.texts=
+		  g_slist_reverse(baton->testcase->summary.texts);
+	    baton->state=WARNINGS_IN_EXPECTED;
+	    break;
 	case WARNINGS_IN_WARNING:
 	    baton->warning->locations=
 	      g_slist_reverse(baton->warning->locations);
@@ -177,7 +220,7 @@
 	    baton->state=WARNINGS_IN_WARNING;
 	    break;
 	case WARNINGS_IN_TEXT:
-	    baton->state=WARNINGS_IN_WARNING;
+	    baton->state=baton->parent_state;
 	    break;
 	default:
 	    g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_UNKNOWN_ELEMENT,
@@ -198,6 +241,11 @@
 		g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_INVALID_CONTENT,
 		  "The 'expected' tag does not take any content");
 	    break;
+	case WARNINGS_IN_SUMMARY:
+	    if (strspn(text," \t\n")!=text_len)
+		g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_INVALID_CONTENT,
+		  "The summary tags do not take any content");
+	    break;
 	case WARNINGS_IN_WARNING:
 	    if (strspn(text," \t\n")!=text_len)
 		g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_INVALID_CONTENT,
@@ -211,7 +259,10 @@
 	case WARNINGS_IN_TEXT:
 	    s=g_strdup(text+strspn(text," \t\n"));
 	    g_strchomp(s);
-	    if (baton->warning->text)
+	    if (baton->parent_state==WARNINGS_IN_SUMMARY)
+		baton->testcase->summary.texts=
+		  g_slist_prepend(baton->testcase->summary.texts,s);
+	    else if (baton->warning->text)
 	    {
 		t=g_strconcat(baton->warning->text,s,NULL);
 		g_free(baton->warning->text);
@@ -237,6 +288,7 @@
     parser.text=warnings_parser_text;
     baton=g_new0(WarningsBaton,1);
     baton->testcase=testcase;
+    baton->parent_state=WARNINGS_INIT;
     baton->state=WARNINGS_INIT;
     return g_markup_parse_context_new(&parser,
       G_MARKUP_TREAT_CDATA_AS_TEXT|G_MARKUP_PREFIX_ERROR_POSITION,