1.1 --- a/bookloupe/bookloupe.c Wed Oct 02 23:51:18 2013 +0100
1.2 +++ b/bookloupe/bookloupe.c Thu Oct 03 16:09:39 2013 +0100
1.3 @@ -431,6 +431,40 @@
1.4 return 0;
1.5 }
1.6
1.7 +void count_dashes(const char *line,const char *dash,
1.8 + struct dash_results *results)
1.9 +{
1.10 + int i;
1.11 + gchar **tokens;
1.12 + gunichar pc,nc;
1.13 + gboolean spaced=FALSE,unspaced=FALSE,spaced2=FALSE;
1.14 + if (!*line)
1.15 + return;
1.16 + tokens=g_strsplit(line,dash,0);
1.17 + if (tokens[1])
1.18 + results->base++;
1.19 + for(i=1;tokens[i];i++)
1.20 + {
1.21 + pc=g_utf8_get_char(g_utf8_prev_char(tokens[i-1]+strlen(tokens[i-1])));
1.22 + nc=g_utf8_get_char(tokens[i]);
1.23 + if (g_unichar_isspace(pc) || g_unichar_isspace(nc))
1.24 + spaced=TRUE;
1.25 + if (g_unichar_isspace(pc) && g_unichar_isspace(nc))
1.26 + spaced2=TRUE;
1.27 + else if (!g_unichar_isspace(pc) && !g_unichar_isspace(nc))
1.28 + unspaced=TRUE;
1.29 + }
1.30 + if (spaced)
1.31 + results->space++;
1.32 + if (spaced2)
1.33 + /* count of lines with em-dashes with spaces both sides */
1.34 + results->non_PG_space++;
1.35 + if (unspaced)
1.36 + /* count of lines with PG-type em-dashes with no spaces */
1.37 + results->PG_space++;
1.38 + g_strfreev(tokens);
1.39 +}
1.40 +
1.41 /*
1.42 * first_pass:
1.43 *
1.44 @@ -449,6 +483,7 @@
1.45 unsigned int lastlen=0,lastblen=0;
1.46 long spline=0,nspline=0;
1.47 static struct first_pass_results results={0};
1.48 + struct dash_results tmp_dash_results;
1.49 gchar *inword;
1.50 QuoteClass qc;
1.51 lines=g_strsplit(etext,"\n",0);
1.52 @@ -512,7 +547,7 @@
1.53 else
1.54 qc=INVALID_QUOTE;
1.55 if ((qc==CLOSING_QUOTE || qc==NEUTRAL_QUOTE) &&
1.56 - isalpha(g_utf8_get_char(g_utf8_prev_char(s))))
1.57 + g_unichar_isalpha(g_utf8_get_char(g_utf8_prev_char(s))))
1.58 results.endquote_count++;
1.59 }
1.60 }
1.61 @@ -559,18 +594,15 @@
1.62 results.htmcount+=4; /* bonus marks! */
1.63 }
1.64 /* Check for spaced em-dashes */
1.65 - if (lines[j][0] && (s=strstr(g_utf8_next_char(lines[j]),"--")))
1.66 - {
1.67 - results.emdash++;
1.68 - if (s[-1]==CHAR_SPACE || s[2]==CHAR_SPACE)
1.69 - results.space_emdash++;
1.70 - if (s[-1]==CHAR_SPACE && s[2]==CHAR_SPACE)
1.71 - /* count of em-dashes with spaces both sides */
1.72 - results.non_PG_space_emdash++;
1.73 - if (s[-1]!=CHAR_SPACE && s[2]!=CHAR_SPACE)
1.74 - /* count of PG-type em-dashes with no spaces */
1.75 - results.PG_space_emdash++;
1.76 - }
1.77 + memset(&tmp_dash_results,0,sizeof(tmp_dash_results));
1.78 + count_dashes(lines[j],"--",&tmp_dash_results);
1.79 + count_dashes(lines[j],"—",&tmp_dash_results);
1.80 + if (tmp_dash_results.base)
1.81 + results.emdash.base++;
1.82 + if (tmp_dash_results.non_PG_space)
1.83 + results.emdash.non_PG_space++;
1.84 + if (tmp_dash_results.PG_space)
1.85 + results.emdash.PG_space++;
1.86 for (s=lines[j];*s;)
1.87 {
1.88 inword=getaword(&s);
1.89 @@ -700,13 +732,13 @@
1.90 * and some people insist on them whatever the guidelines say.
1.91 */
1.92 warnings.dash=1;
1.93 - if (results->spacedash+results->non_PG_space_emdash>
1.94 - results->PG_space_emdash)
1.95 + if (results->spacedash+results->emdash.non_PG_space>
1.96 + results->emdash.PG_space)
1.97 {
1.98 warnings.dash=0;
1.99 g_print(" --> There are %ld spaced dashes and em-dashes. "
1.100 "Not reporting them.\n",
1.101 - results->spacedash+results->non_PG_space_emdash);
1.102 + results->spacedash+results->emdash.non_PG_space);
1.103 }
1.104 /* If more than a quarter of characters are hi-bit, bug out. */
1.105 warnings.bin=1;
1.106 @@ -1120,20 +1152,53 @@
1.107 }
1.108
1.109 /*
1.110 + * str_emdash:
1.111 + *
1.112 + * Find the first em-dash, return a pointer to it and set <next> to the
1.113 + * character following the dash.
1.114 + */
1.115 +char *str_emdash(const char *s,const char **next)
1.116 +{
1.117 + const char *s1,*s2;
1.118 + s1=strstr(s,"--");
1.119 + s2=strstr(s,"—");
1.120 + if (!s1)
1.121 + {
1.122 + if (s2)
1.123 + *next=g_utf8_next_char(s2);
1.124 + return (char *)s2;
1.125 + }
1.126 + else if (!s2)
1.127 + {
1.128 + *next=g_utf8_next_char(g_utf8_next_char(s1));
1.129 + return (char *)s1;
1.130 + }
1.131 + else if (s1<s2)
1.132 + {
1.133 + *next=g_utf8_next_char(g_utf8_next_char(s1));
1.134 + return (char *)s1;
1.135 + }
1.136 + else
1.137 + {
1.138 + *next=g_utf8_next_char(s2);
1.139 + return (char *)s2;
1.140 + }
1.141 +}
1.142 +
1.143 +/*
1.144 * check_for_spaced_emdash:
1.145 *
1.146 * Check for spaced em-dashes.
1.147 *
1.148 - * We must check _all_ occurrences of "--" on the line
1.149 - * hence the loop - even if the first double-dash is OK
1.150 + * We must check _all_ occurrences of em-dashes on the line
1.151 + * hence the loop - even if the first dash is OK
1.152 * there may be another that's wrong later on.
1.153 */
1.154 void check_for_spaced_emdash(const char *aline)
1.155 {
1.156 const char *s,*t,*next;
1.157 - for (s=aline;t=strstr(s,"--");s=next)
1.158 + for (s=aline;t=str_emdash(s,&next);s=next)
1.159 {
1.160 - next=g_utf8_next_char(g_utf8_next_char(t));
1.161 if (t>aline && g_utf8_get_char(g_utf8_prev_char(t))==CHAR_SPACE ||
1.162 g_utf8_get_char(next)==CHAR_SPACE)
1.163 {
1.164 @@ -2322,7 +2387,7 @@
1.165 qc=CHAR_IS_DQUOTE(c)?QUOTE_CLASS(c):INVALID_QUOTE;
1.166 nc=g_utf8_get_char(g_utf8_next_char(s));
1.167 /* for each character in the line except 1st */
1.168 - if ((qc==CLOSING_QUOTE || qc==NEUTRAL_QUOTE) && isalpha(pc))
1.169 + if ((qc==CLOSING_QUOTE || qc==NEUTRAL_QUOTE) && g_unichar_isalpha(pc))
1.170 {
1.171 if (pswit[ECHO_SWITCH])
1.172 g_print("\n%s\n",aline);
1.173 @@ -2466,7 +2531,7 @@
1.174 cnt_punct++;
1.175 break;
1.176 }
1.177 - if (g_utf8_strchr("-.:!([{?}])",-1,g_utf8_get_char(s)))
1.178 + if (g_utf8_strchr("—-.:!([{?}])",-1,g_utf8_get_char(s)))
1.179 break;
1.180 }
1.181 }