1.1 --- a/bookloupe/bookloupe.c Sun Sep 29 22:51:27 2013 +0100
1.2 +++ b/bookloupe/bookloupe.c Mon Oct 21 23:36:40 2013 +0100
1.3 @@ -245,7 +245,7 @@
1.4
1.5 gboolean mixdigit(const char *);
1.6 gchar *getaword(const char **);
1.7 -char *flgets(char **,long);
1.8 +char *flgets(char **,long,gboolean);
1.9 void postprocess_for_HTML(char *);
1.10 char *linehasmarkup(char *);
1.11 char *losemarkup(char *);
1.12 @@ -735,11 +735,20 @@
1.13 gchar *inword;
1.14 QuoteClass qc;
1.15 lines=g_strsplit(etext,"\n",0);
1.16 + if (lines[0])
1.17 + /* If there's at least one line, we might have UNIX-style terminators */
1.18 + results.unix_lineends=TRUE;
1.19 for (j=0;lines[j];j++)
1.20 {
1.21 lbytes=strlen(lines[j]);
1.22 - while (lbytes>0 && lines[j][lbytes-1]=='\r')
1.23 - lines[j][--lbytes]='\0';
1.24 + if (lbytes>0 && lines[j][lbytes-1]=='\r')
1.25 + {
1.26 + results.unix_lineends=FALSE;
1.27 + do
1.28 + {
1.29 + lines[j][--lbytes]='\0';
1.30 + } while (lbytes>0 && lines[j][lbytes-1]=='\r');
1.31 + }
1.32 llen=g_utf8_strlen(lines[j],lbytes);
1.33 linecnt++;
1.34 if (strstr(lines[j],"*END") && strstr(lines[j],"SMALL PRINT") &&
1.35 @@ -881,6 +890,13 @@
1.36 struct warnings *report_first_pass(struct first_pass_results *results)
1.37 {
1.38 static struct warnings warnings={0};
1.39 + warnings.nocr=1;
1.40 + if (results->unix_lineends)
1.41 + {
1.42 + warnings.nocr=0;
1.43 + g_print(" --> No lines in this file have a CR. Not reporting them. "
1.44 + "Project Gutenberg requires that all lineends be CR-LF.\n");
1.45 + }
1.46 if (cnt_spacend>0)
1.47 g_print(" --> %ld lines in this file have white space at end\n",
1.48 cnt_spacend);
1.49 @@ -2869,7 +2885,7 @@
1.50 */
1.51 linecnt=0;
1.52 etext_ptr=etext;
1.53 - while ((aline=flgets(&etext_ptr,linecnt+1)))
1.54 + while ((aline=flgets(&etext_ptr,linecnt+1,warnings->nocr)))
1.55 {
1.56 linecnt++;
1.57 if (linecnt==1)
1.58 @@ -3015,7 +3031,7 @@
1.59 *
1.60 * Returns: a pointer to the line.
1.61 */
1.62 -char *flgets(char **etext,long lcnt)
1.63 +char *flgets(char **etext,long lcnt,gboolean warn_nocr)
1.64 {
1.65 gunichar c;
1.66 gboolean isCR=FALSE;
1.67 @@ -3054,7 +3070,7 @@
1.68 else
1.69 {
1.70 /* Error - a LF without a preceding CR */
1.71 - if (pswit[LINE_END_SWITCH])
1.72 + if (pswit[LINE_END_SWITCH] && warn_nocr)
1.73 {
1.74 if (pswit[ECHO_SWITCH])
1.75 {
2.1 --- a/bookloupe/bookloupe.h Sun Sep 29 22:51:27 2013 +0100
2.2 +++ b/bookloupe/bookloupe.h Mon Oct 21 23:36:40 2013 +0100
2.3 @@ -70,12 +70,13 @@
2.4 long spacedash;
2.5 struct dash_results emdash;
2.6 int Dutchcount,Frenchcount;
2.7 + gboolean unix_lineends;
2.8 };
2.9
2.10 struct warnings {
2.11 int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen;
2.12 int endquote;
2.13 - gboolean isDutch,isFrench;
2.14 + gboolean isDutch,isFrench,nocr;
2.15 };
2.16
2.17 struct line_properties {
3.1 --- a/doc/loupe-test.txt Sun Sep 29 22:51:27 2013 +0100
3.2 +++ b/doc/loupe-test.txt Mon Oct 21 23:36:40 2013 +0100
3.3 @@ -91,14 +91,35 @@
3.4 ------------------
3.5
3.6 One of the tests that bookloupe/gutcheck need to do is check that all
3.7 -lines are ended with CR NL (as required by PG) rather than the UNIX
3.8 -standard NL. loupe-test deliberately ignores the line endings in testcase
3.9 -definition files and always uses CR NL. Thus there is needed a means
3.10 +lines are ended with CR LF (as required by PG) rather than the UNIX
3.11 +standard LF. loupe-test deliberately ignores the line endings in testcase
3.12 +definition files and uses the expected CR LF. Thus there is needed a means
3.13 to embed a linefeed (aka newline) character into the input to be sent
3.14 to bookloupe/gutcheck to test that it correctly identified the problem.
3.15 loupe-test recognises the unicode symbol for linefeed (U+240A): ␊ which
3.16 can be used for this purpose instead of a normal newline.
3.17
3.18 +UNIX-style newlines
3.19 +-------------------
3.20 +
3.21 +To make life easier for users on UNIX and similar platforms, bookloupe
3.22 +recognises the case of all lines terminated with UNIX-style newlines.
3.23 +It notes this in the summary but does not issue any warnings. We thus
3.24 +need some way to test this case which we do by the NEWLINES tag:
3.25 +
3.26 + ┌──────────────────────────────────────────────────────────────────────────┐
3.27 + │**************** NEWLINES **************** │
3.28 + │LF │
3.29 + │**************** INPUT **************** │
3.30 + │Katherine was assailed by a sudden doubt. Had she mailed that letter? Yes,│
3.31 + │she was certain of that. She had run out to the mail box at ten o'clock │
3.32 + │at night especially to mail it. What had gone wrong? Why wasn't there │
3.33 + │someone to meet her? │
3.34 + └──────────────────────────────────────────────────────────────────────────┘
3.35 +
3.36 +The possible options are CRLF for DOS-style newlines (the default) and
3.37 +LF for UNIX-style newlines.
3.38 +
3.39 Passing command line options
3.40 ----------------------------
3.41
3.42 @@ -203,3 +224,16 @@
3.43 this, eg.:
3.44
3.45 sample: PASS (with 1 of 1 false positives and 1 of 1 false negatives)
3.46 +
3.47 +The summary
3.48 +-----------
3.49 +
3.50 +As part of the header (the first section of output), bookloupe may display
3.51 +a number of summary lines. These are characterized by a leading ASCII
3.52 +long arrow (-->) and generally say something about the ebook as a whole
3.53 +rather than individual lines. Where it is desired to test for the presence
3.54 +of a summary line, a "summary" node can be included within the "expected"
3.55 +node of a testcase using structured warnings. The "summary" node can contain
3.56 +one or more "text" nodes which indicate the text of lines that must be
3.57 +present in the summary section in order for the test to pass. No account is
3.58 +taken of the order of such lines and other summary lines may also be present.
4.1 --- a/test/bookloupe/Makefile.am Sun Sep 29 22:51:27 2013 +0100
4.2 +++ b/test/bookloupe/Makefile.am Mon Oct 21 23:36:40 2013 +0100
4.3 @@ -2,6 +2,6 @@
4.4 TESTS=non-ascii.tst long-line.tst curved-single-quotes.tst curved-quotes.tst \
4.5 runfox-quotes.tst curved-genitives.tst multi-line-illustration.tst \
4.6 emdash.tst config-internal.tst config-default.tst config-user.tst \
4.7 - config-override.tst footnote-marker.tst
4.8 + config-override.tst footnote-marker.tst unix-lineends.tst
4.9
4.10 dist_pkgdata_DATA=$(TESTS)
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
5.2 +++ b/test/bookloupe/unix-lineends.tst Mon Oct 21 23:36:40 2013 +0100
5.3 @@ -0,0 +1,13 @@
5.4 +**************** NEWLINES ****************
5.5 +LF
5.6 +**************** INPUT ****************
5.7 +Katherine was assailed by a sudden doubt. Had she mailed that letter? Yes,
5.8 +she was certain of that. She had run out to the mail box at ten o'clock
5.9 +at night especially to mail it. What had gone wrong? Why wasn't there
5.10 +someone to meet her?
5.11 +**************** WARNINGS ****************
5.12 +<expected>
5.13 + <summary>
5.14 + <text>No lines in this file have a CR. Not reporting them. Project Gutenberg requires that all lineends be CR-LF.</text>
5.15 + </summary>
5.16 +</expected>
6.1 --- a/test/harness/testcase.c Sun Sep 29 22:51:27 2013 +0100
6.2 +++ b/test/harness/testcase.c Mon Oct 21 23:36:40 2013 +0100
6.3 @@ -384,6 +384,42 @@
6.4 return s-output+1;
6.5 }
6.6
6.7 + /*
6.8 + * Check the summary produced by bookloupe against testcase->summary.
6.9 + */
6.10 +static gboolean testcase_check_summary(Testcase *testcase,const char *summary)
6.11 +{
6.12 + int i;
6.13 + gboolean r;
6.14 + gchar **lines;
6.15 + GSList *texts,*lnk;
6.16 + if (!testcase->summary.texts)
6.17 + return TRUE;
6.18 + texts=g_slist_copy(testcase->summary.texts);
6.19 + lines=g_strsplit(summary,"\n",0);
6.20 + for(i=0;lines[i];i++)
6.21 + {
6.22 + if (!g_str_has_prefix(lines[i]," --> "))
6.23 + continue;
6.24 + for(lnk=texts;lnk;lnk=lnk->next)
6.25 + if (!strcmp(lines[i]+7,lnk->data))
6.26 + {
6.27 + texts=g_slist_delete_link(texts,lnk);
6.28 + break;
6.29 + }
6.30 + }
6.31 + g_strfreev(lines);
6.32 + r=!texts;
6.33 + if (texts)
6.34 + {
6.35 + g_print("%s: FAIL\n",testcase->basename);
6.36 + g_print("Missing summary text from bookloupe:\n");
6.37 + g_print(" --> %s\n",texts->data);
6.38 + }
6.39 + g_slist_free(texts);
6.40 + return r;
6.41 +}
6.42 +
6.43 /*
6.44 * Check the warnings produced by bookloupe against either the
6.45 * unstructured testcase->expected or the structured testcase->warnings
6.46 @@ -519,7 +555,7 @@
6.47 gboolean r;
6.48 size_t pos,offset;
6.49 GString *header;
6.50 - char *filename,*s,*xfail=NULL;
6.51 + char *filename,*s,*hdr,*summary,*xfail=NULL;
6.52 GError *error=NULL;
6.53 if (!testcase_create_input_files(testcase,&error))
6.54 {
6.55 @@ -558,23 +594,30 @@
6.56 print_unexpected(testcase->test_output,offset);
6.57 r=FALSE;
6.58 }
6.59 + summary=testcase->test_output+header->len;
6.60 pos=header->len;
6.61 if (r)
6.62 {
6.63 - /* Skip the summary */
6.64 - s=strstr(testcase->test_output+pos,"\n\n");
6.65 + /* Find the end of the summary */
6.66 + s=strstr(summary,"\n\n");
6.67 if (s)
6.68 + {
6.69 + summary=g_strndup(summary,s-summary);
6.70 + r=testcase_check_summary(testcase,summary);
6.71 + g_free(summary);
6.72 pos=s-testcase->test_output+2;
6.73 + }
6.74 else
6.75 {
6.76 g_print("%s: FAIL\n",testcase->basename);
6.77 - g_print("Unterminated summary from bookloupe:\n%s\n",
6.78 - testcase->test_output+pos);
6.79 + g_print("Unterminated summary from bookloupe:\n%s\n",summary);
6.80 r=FALSE;
6.81 }
6.82 }
6.83 g_string_free(header,TRUE);
6.84 - r=testcase_check_warnings(testcase,testcase->test_output+pos,&xfail);
6.85 + if (r)
6.86 + r=testcase_check_warnings(testcase,testcase->test_output+pos,
6.87 + &xfail);
6.88 }
6.89 if (!testcase_verify_output_files(testcase))
6.90 r=FALSE;
7.1 --- a/test/harness/testcase.h Sun Sep 29 22:51:27 2013 +0100
7.2 +++ b/test/harness/testcase.h Mon Oct 21 23:36:40 2013 +0100
7.3 @@ -15,6 +15,10 @@
7.4 } TestcaseLocation;
7.5
7.6 typedef struct {
7.7 + GSList *texts;
7.8 +} TestcaseSummary;
7.9 +
7.10 +typedef struct {
7.11 /*
7.12 * Does this warning relate to a real problem in the etext
7.13 * (eg., error and false-negative).
7.14 @@ -39,6 +43,7 @@
7.15 GSList *inputs;
7.16 GSList *outputs;
7.17 char *expected;
7.18 + TestcaseSummary summary;
7.19 GSList *warnings;
7.20 char *encoding; /* The character encoding to talk to BOOKLOUPE in */
7.21 char **options;
7.22 @@ -46,6 +51,7 @@
7.23 enum {
7.24 TESTCASE_XFAIL=1<<0,
7.25 TESTCASE_TMP_DIR=1<<1,
7.26 + TESTCASE_UNIX_NEWLINES=1<<2,
7.27 } flags;
7.28 } Testcase;
7.29
8.1 --- a/test/harness/testcaseinput.c Sun Sep 29 22:51:27 2013 +0100
8.2 +++ b/test/harness/testcaseinput.c Mon Oct 21 23:36:40 2013 +0100
8.3 @@ -76,9 +76,15 @@
8.4 {
8.5 if (testcase->encoding)
8.6 {
8.7 - t=unix2dos(input->contents);
8.8 - s=g_convert(t,-1,testcase->encoding,"UTF-8",NULL,&n,&tmp_err);
8.9 - g_free(t);
8.10 + if (testcase->flags&TESTCASE_UNIX_NEWLINES)
8.11 + s=g_convert(input->contents,-1,testcase->encoding,"UTF-8",NULL,
8.12 + &n,&tmp_err);
8.13 + else
8.14 + {
8.15 + t=unix2dos(input->contents);
8.16 + s=g_convert(t,-1,testcase->encoding,"UTF-8",NULL,&n,&tmp_err);
8.17 + g_free(t);
8.18 + }
8.19 if (!s)
8.20 {
8.21 g_propagate_prefixed_error(error,tmp_err,
8.22 @@ -86,6 +92,11 @@
8.23 return FALSE;
8.24 }
8.25 }
8.26 + else if (testcase->flags&TESTCASE_UNIX_NEWLINES)
8.27 + {
8.28 + s=g_strdup(input->contents);
8.29 + n=strlen(s);
8.30 + }
8.31 else
8.32 {
8.33 s=unix2dos(input->contents);
9.1 --- a/test/harness/testcaseio.c Sun Sep 29 22:51:27 2013 +0100
9.2 +++ b/test/harness/testcaseio.c Mon Oct 21 23:36:40 2013 +0100
9.3 @@ -23,7 +23,7 @@
9.4 GError *err=NULL;
9.5 char *s,*arg;
9.6 const char *tag,*text;
9.7 - gboolean found_tag=FALSE;
9.8 + gboolean found_tag=FALSE,newlines_set=FALSE;
9.9 parser=testcase_parser_new_from_file(filename);
9.10 if (!parser)
9.11 return NULL;
9.12 @@ -108,6 +108,24 @@
9.13 }
9.14 else if (!testcase->encoding && !strcmp(tag,"ENCODING"))
9.15 testcase->encoding=g_strchomp(g_strdup(text));
9.16 + else if (!newlines_set && !strcmp(tag,"NEWLINES"))
9.17 + {
9.18 + newlines_set=TRUE;
9.19 + s=g_strdup(text);
9.20 + g_strchomp(s);
9.21 + if (!strcmp(s,"LF"))
9.22 + testcase->flags|=TESTCASE_UNIX_NEWLINES;
9.23 + else if (strcmp(s,"CRLF"))
9.24 + {
9.25 + g_printerr(
9.26 + "%s: Unrecognised style for newlines. Try LF or CRLF.\n",s);
9.27 + g_free(s);
9.28 + testcase_free(testcase);
9.29 + testcase_parser_free(parser);
9.30 + return NULL;
9.31 + }
9.32 + g_free(s);
9.33 + }
9.34 else if (!testcase->options && !strcmp(tag,"OPTIONS"))
9.35 {
9.36 testcase->options=g_strsplit(text,"\n",0);
10.1 --- a/test/harness/warningsparser.c Sun Sep 29 22:51:27 2013 +0100
10.2 +++ b/test/harness/warningsparser.c Mon Oct 21 23:36:40 2013 +0100
10.3 @@ -15,11 +15,12 @@
10.4 enum {
10.5 WARNINGS_INIT,
10.6 WARNINGS_IN_EXPECTED,
10.7 + WARNINGS_IN_SUMMARY,
10.8 WARNINGS_IN_WARNING,
10.9 WARNINGS_IN_AT,
10.10 WARNINGS_IN_TEXT,
10.11 WARNINGS_DONE,
10.12 - } state;
10.13 + } state,parent_state;
10.14 } WarningsBaton;
10.15
10.16 static void warnings_parser_start_element(GMarkupParseContext *context,
10.17 @@ -30,6 +31,7 @@
10.18 guint64 tmp;
10.19 char *endp;
10.20 WarningsBaton *baton=user_data;
10.21 + baton->parent_state=baton->state;
10.22 switch(baton->state)
10.23 {
10.24 case WARNINGS_INIT:
10.25 @@ -45,20 +47,36 @@
10.26 baton->state=WARNINGS_IN_EXPECTED;
10.27 break;
10.28 case WARNINGS_IN_EXPECTED:
10.29 - baton->warning=g_new0(TestcaseWarning,1);
10.30 - if (!strcmp(element_name,"error"))
10.31 - baton->warning->is_real=TRUE;
10.32 - else if (!strcmp(element_name,"false-positive"))
10.33 - baton->warning->xfail=TRUE;
10.34 - else if (!strcmp(element_name,"false-negative"))
10.35 - baton->warning->is_real=baton->warning->xfail=TRUE;
10.36 + if (!strcmp(element_name,"summary"))
10.37 + {
10.38 + if (baton->testcase->summary.texts)
10.39 + {
10.40 + g_set_error(error,G_MARKUP_ERROR,
10.41 + G_MARKUP_ERROR_INVALID_CONTENT,"Multiple summary "
10.42 + "elements are not valid");
10.43 + }
10.44 + else
10.45 + baton->state=WARNINGS_IN_SUMMARY;
10.46 + }
10.47 else
10.48 {
10.49 - g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_UNKNOWN_ELEMENT,
10.50 - "Unknown element in 'expected': '%s'",element_name);
10.51 - g_free(baton->warning);
10.52 - baton->warning=NULL;
10.53 - return;
10.54 + baton->warning=g_new0(TestcaseWarning,1);
10.55 + if (!strcmp(element_name,"error"))
10.56 + baton->warning->is_real=TRUE;
10.57 + else if (!strcmp(element_name,"false-positive"))
10.58 + baton->warning->xfail=TRUE;
10.59 + else if (!strcmp(element_name,"false-negative"))
10.60 + baton->warning->is_real=baton->warning->xfail=TRUE;
10.61 + else
10.62 + {
10.63 + g_set_error(error,G_MARKUP_ERROR,
10.64 + G_MARKUP_ERROR_UNKNOWN_ELEMENT,
10.65 + "Unknown element in 'expected': '%s'",element_name);
10.66 + g_free(baton->warning);
10.67 + baton->warning=NULL;
10.68 + return;
10.69 + }
10.70 + baton->state=WARNINGS_IN_WARNING;
10.71 }
10.72 if (attribute_names[0])
10.73 {
10.74 @@ -66,12 +84,28 @@
10.75 G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
10.76 "Unknown attribute on element '%s': '%s'",element_name,
10.77 attribute_names[0]);
10.78 - g_free(baton->warning);
10.79 - baton->warning=NULL;
10.80 + if (baton->state==WARNINGS_IN_WARNING)
10.81 + {
10.82 + g_free(baton->warning);
10.83 + baton->warning=NULL;
10.84 + }
10.85 + baton->state=WARNINGS_IN_EXPECTED;
10.86 return;
10.87 }
10.88 - else
10.89 - baton->state=WARNINGS_IN_WARNING;
10.90 + break;
10.91 + case WARNINGS_IN_SUMMARY:
10.92 + if (!strcmp(element_name,"text"))
10.93 + {
10.94 + if (attribute_names[0])
10.95 + {
10.96 + g_set_error(error,G_MARKUP_ERROR,
10.97 + G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
10.98 + "Unknown attribute on element 'text': '%s'",
10.99 + attribute_names[0]);
10.100 + return;
10.101 + }
10.102 + baton->state=WARNINGS_IN_TEXT;
10.103 + }
10.104 break;
10.105 case WARNINGS_IN_WARNING:
10.106 if (!strcmp(element_name,"at"))
10.107 @@ -162,6 +196,15 @@
10.108 g_slist_reverse(baton->testcase->warnings);
10.109 baton->state=WARNINGS_DONE;
10.110 break;
10.111 + case WARNINGS_IN_SUMMARY:
10.112 + if (!baton->testcase->summary.texts)
10.113 + g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_INVALID_CONTENT,
10.114 + "Summary element must contain at least one text element");
10.115 + else
10.116 + baton->testcase->summary.texts=
10.117 + g_slist_reverse(baton->testcase->summary.texts);
10.118 + baton->state=WARNINGS_IN_EXPECTED;
10.119 + break;
10.120 case WARNINGS_IN_WARNING:
10.121 baton->warning->locations=
10.122 g_slist_reverse(baton->warning->locations);
10.123 @@ -177,7 +220,7 @@
10.124 baton->state=WARNINGS_IN_WARNING;
10.125 break;
10.126 case WARNINGS_IN_TEXT:
10.127 - baton->state=WARNINGS_IN_WARNING;
10.128 + baton->state=baton->parent_state;
10.129 break;
10.130 default:
10.131 g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_UNKNOWN_ELEMENT,
10.132 @@ -198,6 +241,11 @@
10.133 g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_INVALID_CONTENT,
10.134 "The 'expected' tag does not take any content");
10.135 break;
10.136 + case WARNINGS_IN_SUMMARY:
10.137 + if (strspn(text," \t\n")!=text_len)
10.138 + g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_INVALID_CONTENT,
10.139 + "The summary tags do not take any content");
10.140 + break;
10.141 case WARNINGS_IN_WARNING:
10.142 if (strspn(text," \t\n")!=text_len)
10.143 g_set_error(error,G_MARKUP_ERROR,G_MARKUP_ERROR_INVALID_CONTENT,
10.144 @@ -211,7 +259,10 @@
10.145 case WARNINGS_IN_TEXT:
10.146 s=g_strdup(text+strspn(text," \t\n"));
10.147 g_strchomp(s);
10.148 - if (baton->warning->text)
10.149 + if (baton->parent_state==WARNINGS_IN_SUMMARY)
10.150 + baton->testcase->summary.texts=
10.151 + g_slist_prepend(baton->testcase->summary.texts,s);
10.152 + else if (baton->warning->text)
10.153 {
10.154 t=g_strconcat(baton->warning->text,s,NULL);
10.155 g_free(baton->warning->text);
10.156 @@ -237,6 +288,7 @@
10.157 parser.text=warnings_parser_text;
10.158 baton=g_new0(WarningsBaton,1);
10.159 baton->testcase=testcase;
10.160 + baton->parent_state=WARNINGS_INIT;
10.161 baton->state=WARNINGS_INIT;
10.162 return g_markup_parse_context_new(&parser,
10.163 G_MARKUP_TREAT_CDATA_AS_TEXT|G_MARKUP_PREFIX_ERROR_POSITION,