diff -r ad92d11d59b8 -r f44c530f80da bookloupe/bookloupe.c --- a/bookloupe/bookloupe.c Tue Oct 15 09:16:04 2013 +0100 +++ b/bookloupe/bookloupe.c Sat Oct 26 18:47:33 2013 +0100 @@ -183,7 +183,7 @@ gboolean mixdigit(const char *); gchar *getaword(const char **); -char *flgets(char **,long); +char *flgets(char **,long,int); void postprocess_for_HTML(char *); char *linehasmarkup(char *); char *losemarkup(char *); @@ -487,11 +487,40 @@ gchar *inword; QuoteClass qc; lines=g_strsplit(etext,"\n",0); + if (!lines[0]) + { + /* An empty etext has no terminators */ + results.newlines=DOS_NEWLINES; + } + else if (!lines[1]) + { + /* + * If there are no LFs, we don't have UNIX-style + * terminators, but we might have OS9-style ones. + */ + results.newlines=OS9_NEWLINES; + g_strfreev(lines); + lines=g_strsplit(etext,"\r",0); + if (!lines[0] || !lines[1]) + /* Looks like we don't have any terminators at all */ + results.newlines=DOS_NEWLINES; + } + else + { + /* We might have UNIX-style terminators */ + results.newlines=UNIX_NEWLINES; + } for (j=0;lines[j];j++) { lbytes=strlen(lines[j]); - while (lbytes>0 && lines[j][lbytes-1]=='\r') - lines[j][--lbytes]='\0'; + if (lbytes>0 && lines[j][lbytes-1]=='\r') + { + results.newlines=DOS_NEWLINES; + do + { + lines[j][--lbytes]='\0'; + } while (lbytes>0 && lines[j][lbytes-1]=='\r'); + } llen=g_utf8_strlen(lines[j],lbytes); linecnt++; if (strstr(lines[j],"*END") && strstr(lines[j],"SMALL PRINT") && @@ -633,6 +662,13 @@ struct warnings *report_first_pass(struct first_pass_results *results) { static struct warnings warnings={0}; + warnings.newlines=results->newlines; + if (warnings.newlines==UNIX_NEWLINES) + g_print(" --> No lines in this file have a CR. Not reporting them. " + "Project Gutenberg requires that all lineends be CR-LF.\n"); + else if (warnings.newlines==OS9_NEWLINES) + g_print(" --> No lines in this file have a LF. Not reporting them. " + "Project Gutenberg requires that all lineends be CR-LF.\n"); if (cnt_spacend>0) g_print(" --> %ld lines in this file have white space at end\n", cnt_spacend); @@ -2621,7 +2657,7 @@ */ linecnt=0; etext_ptr=etext; - while ((aline=flgets(&etext_ptr,linecnt+1))) + while ((aline=flgets(&etext_ptr,linecnt+1,warnings->newlines))) { linecnt++; if (linecnt==1) @@ -2762,12 +2798,21 @@ /* * flgets: * - * Get one line from the input text, checking for - * the existence of exactly one CR/LF line-end per line. + * Get one line from the input text. The setting of newlines has the following + * effect: + * + * DOS_NEWLINES: Check for the existence of exactly one CR-LF line-end per line. + * + * OS9_NEWLINES: Asserts that etext contains no LFs. CR is used as + * the newline character. + * + * UNIX_NEWLINES: Check for the presence of CRs. + * + * In all cases, check that the last line is correctly terminated. * * Returns: a pointer to the line. */ -char *flgets(char **etext,long lcnt) +char *flgets(char **etext,long lcnt,int newlines) { gunichar c; gboolean isCR=FALSE; @@ -2790,8 +2835,15 @@ g_free(s); } if (!pswit[OVERVIEW_SWITCH]) - /* There may, or may not, have been a CR */ - g_print(" Line %ld - No LF?\n",lcnt); + { + if (newlines==OS9_NEWLINES) + g_print(" Line %ld - No CR?\n",lcnt); + else + { + /* There may, or may not, have been a CR */ + g_print(" Line %ld - No LF?\n",lcnt); + } + } else cnt_lineend++; } @@ -2801,9 +2853,7 @@ /* either way, it's end of line */ if (c=='\n') { - if (isCR) - break; - else + if (newlines==DOS_NEWLINES && !isCR) { /* Error - a LF without a preceding CR */ if (pswit[LINE_END_SWITCH]) @@ -2819,14 +2869,15 @@ else cnt_lineend++; } - break; } + break; } if (c=='\r') { - if (isCR) + if (newlines==OS9_NEWLINES) + break; + if (isCR || newlines==UNIX_NEWLINES) { - /* Error - two successive CRs */ if (pswit[LINE_END_SWITCH]) { if (pswit[ECHO_SWITCH]) @@ -2836,12 +2887,22 @@ g_free(s); } if (!pswit[OVERVIEW_SWITCH]) - g_print(" Line %ld - Two successive CRs?\n",lcnt); + { + if (newlines==UNIX_NEWLINES) + g_print(" Line %ld column %ld - Embedded CR?\n", + lcnt,g_utf8_pointer_to_offset(theline,eos)+1); + else + g_print(" Line %ld - Two successive CRs?\n", + lcnt); + } else cnt_lineend++; } + if (newlines==UNIX_NEWLINES) + *eos=' '; } - isCR=TRUE; + if (newlines==DOS_NEWLINES) + isCR=TRUE; } else {