1.1 --- a/bookloupe/bookloupe.c Tue Oct 15 09:16:04 2013 +0100
1.2 +++ b/bookloupe/bookloupe.c Sat Oct 26 18:47:33 2013 +0100
1.3 @@ -183,7 +183,7 @@
1.4
1.5 gboolean mixdigit(const char *);
1.6 gchar *getaword(const char **);
1.7 -char *flgets(char **,long);
1.8 +char *flgets(char **,long,int);
1.9 void postprocess_for_HTML(char *);
1.10 char *linehasmarkup(char *);
1.11 char *losemarkup(char *);
1.12 @@ -487,11 +487,40 @@
1.13 gchar *inword;
1.14 QuoteClass qc;
1.15 lines=g_strsplit(etext,"\n",0);
1.16 + if (!lines[0])
1.17 + {
1.18 + /* An empty etext has no terminators */
1.19 + results.newlines=DOS_NEWLINES;
1.20 + }
1.21 + else if (!lines[1])
1.22 + {
1.23 + /*
1.24 + * If there are no LFs, we don't have UNIX-style
1.25 + * terminators, but we might have OS9-style ones.
1.26 + */
1.27 + results.newlines=OS9_NEWLINES;
1.28 + g_strfreev(lines);
1.29 + lines=g_strsplit(etext,"\r",0);
1.30 + if (!lines[0] || !lines[1])
1.31 + /* Looks like we don't have any terminators at all */
1.32 + results.newlines=DOS_NEWLINES;
1.33 + }
1.34 + else
1.35 + {
1.36 + /* We might have UNIX-style terminators */
1.37 + results.newlines=UNIX_NEWLINES;
1.38 + }
1.39 for (j=0;lines[j];j++)
1.40 {
1.41 lbytes=strlen(lines[j]);
1.42 - while (lbytes>0 && lines[j][lbytes-1]=='\r')
1.43 - lines[j][--lbytes]='\0';
1.44 + if (lbytes>0 && lines[j][lbytes-1]=='\r')
1.45 + {
1.46 + results.newlines=DOS_NEWLINES;
1.47 + do
1.48 + {
1.49 + lines[j][--lbytes]='\0';
1.50 + } while (lbytes>0 && lines[j][lbytes-1]=='\r');
1.51 + }
1.52 llen=g_utf8_strlen(lines[j],lbytes);
1.53 linecnt++;
1.54 if (strstr(lines[j],"*END") && strstr(lines[j],"SMALL PRINT") &&
1.55 @@ -633,6 +662,13 @@
1.56 struct warnings *report_first_pass(struct first_pass_results *results)
1.57 {
1.58 static struct warnings warnings={0};
1.59 + warnings.newlines=results->newlines;
1.60 + if (warnings.newlines==UNIX_NEWLINES)
1.61 + g_print(" --> No lines in this file have a CR. Not reporting them. "
1.62 + "Project Gutenberg requires that all lineends be CR-LF.\n");
1.63 + else if (warnings.newlines==OS9_NEWLINES)
1.64 + g_print(" --> No lines in this file have a LF. Not reporting them. "
1.65 + "Project Gutenberg requires that all lineends be CR-LF.\n");
1.66 if (cnt_spacend>0)
1.67 g_print(" --> %ld lines in this file have white space at end\n",
1.68 cnt_spacend);
1.69 @@ -2621,7 +2657,7 @@
1.70 */
1.71 linecnt=0;
1.72 etext_ptr=etext;
1.73 - while ((aline=flgets(&etext_ptr,linecnt+1)))
1.74 + while ((aline=flgets(&etext_ptr,linecnt+1,warnings->newlines)))
1.75 {
1.76 linecnt++;
1.77 if (linecnt==1)
1.78 @@ -2762,12 +2798,21 @@
1.79 /*
1.80 * flgets:
1.81 *
1.82 - * Get one line from the input text, checking for
1.83 - * the existence of exactly one CR/LF line-end per line.
1.84 + * Get one line from the input text. The setting of newlines has the following
1.85 + * effect:
1.86 + *
1.87 + * DOS_NEWLINES: Check for the existence of exactly one CR-LF line-end per line.
1.88 + *
1.89 + * OS9_NEWLINES: Asserts that etext contains no LFs. CR is used as
1.90 + * the newline character.
1.91 + *
1.92 + * UNIX_NEWLINES: Check for the presence of CRs.
1.93 + *
1.94 + * In all cases, check that the last line is correctly terminated.
1.95 *
1.96 * Returns: a pointer to the line.
1.97 */
1.98 -char *flgets(char **etext,long lcnt)
1.99 +char *flgets(char **etext,long lcnt,int newlines)
1.100 {
1.101 gunichar c;
1.102 gboolean isCR=FALSE;
1.103 @@ -2790,8 +2835,15 @@
1.104 g_free(s);
1.105 }
1.106 if (!pswit[OVERVIEW_SWITCH])
1.107 - /* There may, or may not, have been a CR */
1.108 - g_print(" Line %ld - No LF?\n",lcnt);
1.109 + {
1.110 + if (newlines==OS9_NEWLINES)
1.111 + g_print(" Line %ld - No CR?\n",lcnt);
1.112 + else
1.113 + {
1.114 + /* There may, or may not, have been a CR */
1.115 + g_print(" Line %ld - No LF?\n",lcnt);
1.116 + }
1.117 + }
1.118 else
1.119 cnt_lineend++;
1.120 }
1.121 @@ -2801,9 +2853,7 @@
1.122 /* either way, it's end of line */
1.123 if (c=='\n')
1.124 {
1.125 - if (isCR)
1.126 - break;
1.127 - else
1.128 + if (newlines==DOS_NEWLINES && !isCR)
1.129 {
1.130 /* Error - a LF without a preceding CR */
1.131 if (pswit[LINE_END_SWITCH])
1.132 @@ -2819,14 +2869,15 @@
1.133 else
1.134 cnt_lineend++;
1.135 }
1.136 - break;
1.137 }
1.138 + break;
1.139 }
1.140 if (c=='\r')
1.141 {
1.142 - if (isCR)
1.143 + if (newlines==OS9_NEWLINES)
1.144 + break;
1.145 + if (isCR || newlines==UNIX_NEWLINES)
1.146 {
1.147 - /* Error - two successive CRs */
1.148 if (pswit[LINE_END_SWITCH])
1.149 {
1.150 if (pswit[ECHO_SWITCH])
1.151 @@ -2836,12 +2887,22 @@
1.152 g_free(s);
1.153 }
1.154 if (!pswit[OVERVIEW_SWITCH])
1.155 - g_print(" Line %ld - Two successive CRs?\n",lcnt);
1.156 + {
1.157 + if (newlines==UNIX_NEWLINES)
1.158 + g_print(" Line %ld column %ld - Embedded CR?\n",
1.159 + lcnt,g_utf8_pointer_to_offset(theline,eos)+1);
1.160 + else
1.161 + g_print(" Line %ld - Two successive CRs?\n",
1.162 + lcnt);
1.163 + }
1.164 else
1.165 cnt_lineend++;
1.166 }
1.167 + if (newlines==UNIX_NEWLINES)
1.168 + *eos=' ';
1.169 }
1.170 - isCR=TRUE;
1.171 + if (newlines==DOS_NEWLINES)
1.172 + isCR=TRUE;
1.173 }
1.174 else
1.175 {