bookloupe/bookloupe.c
changeset 99 7a62c77a0dbe
parent 86 c42c068d2996
child 103 adc06e9e8470
     1.1 --- a/bookloupe/bookloupe.c	Sat Sep 07 08:38:13 2013 +0100
     1.2 +++ b/bookloupe/bookloupe.c	Sat Sep 21 23:40:18 2013 +0100
     1.3 @@ -27,6 +27,8 @@
     1.4  #endif
     1.5  #include <glib.h>
     1.6  #include <bl/bl.h>
     1.7 +#include "bookloupe.h"
     1.8 +#include "counters.h"
     1.9  #include "HTMLentities.h"
    1.10  
    1.11  gchar *prevline;
    1.12 @@ -123,50 +125,6 @@
    1.13      "among", "those", "into", "whom", "having", "thence", ""
    1.14  }; 
    1.15  
    1.16 -/* special characters */
    1.17 -#define CHAR_SPACE	  32
    1.18 -#define CHAR_TAB	   9
    1.19 -#define CHAR_LF		  10
    1.20 -#define CHAR_CR		  13
    1.21 -#define CHAR_DQUOTE	  34
    1.22 -#define CHAR_SQUOTE	  39
    1.23 -#define CHAR_OPEN_SQUOTE  96
    1.24 -#define CHAR_TILDE	 126
    1.25 -#define CHAR_ASTERISK	  42
    1.26 -#define CHAR_FORESLASH	  47
    1.27 -#define CHAR_CARAT	  94
    1.28 -
    1.29 -#define CHAR_UNDERSCORE    '_'
    1.30 -#define CHAR_OPEN_CBRACK   '{'
    1.31 -#define CHAR_CLOSE_CBRACK  '}'
    1.32 -#define CHAR_OPEN_RBRACK   '('
    1.33 -#define CHAR_CLOSE_RBRACK  ')'
    1.34 -#define CHAR_OPEN_SBRACK   '['
    1.35 -#define CHAR_CLOSE_SBRACK  ']'
    1.36 -
    1.37 -/* longest and shortest normal PG line lengths */
    1.38 -#define LONGEST_PG_LINE   75
    1.39 -#define WAY_TOO_LONG      80
    1.40 -#define SHORTEST_PG_LINE  55
    1.41 -
    1.42 -enum {
    1.43 -    ECHO_SWITCH,
    1.44 -    SQUOTE_SWITCH,
    1.45 -    TYPO_SWITCH,
    1.46 -    QPARA_SWITCH,
    1.47 -    PARANOID_SWITCH,
    1.48 -    LINE_END_SWITCH,
    1.49 -    OVERVIEW_SWITCH,
    1.50 -    STDOUT_SWITCH,
    1.51 -    HEADER_SWITCH,
    1.52 -    WEB_SWITCH,
    1.53 -    VERBOSE_SWITCH,
    1.54 -    MARKUP_SWITCH,
    1.55 -    USERTYPO_SWITCH,
    1.56 -    DP_SWITCH,
    1.57 -    SWITNO
    1.58 -};
    1.59 -
    1.60  gboolean pswit[SWITNO];  /* program switches */
    1.61  
    1.62  static GOptionEntry options[]={
    1.63 @@ -242,40 +200,6 @@
    1.64  UINT saved_cp;
    1.65  #endif
    1.66  
    1.67 -struct first_pass_results {
    1.68 -    long firstline,astline;
    1.69 -    long footerline,totlen,binlen,alphalen,endquote_count,shortline,dotcomma;
    1.70 -    long fslashline,hyphens,longline,verylongline,htmcount,standalone_digit;
    1.71 -    long spacedash,emdash,space_emdash,non_PG_space_emdash,PG_space_emdash;
    1.72 -    int Dutchcount,Frenchcount;
    1.73 -};
    1.74 -
    1.75 -struct warnings {
    1.76 -    int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen;
    1.77 -    int endquote;
    1.78 -    gboolean isDutch,isFrench;
    1.79 -};
    1.80 -
    1.81 -struct counters {
    1.82 -    long quot;
    1.83 -    int c_unders,c_brack,s_brack,r_brack;
    1.84 -    int open_single_quote,close_single_quote;
    1.85 -};
    1.86 -
    1.87 -struct line_properties {
    1.88 -    unsigned int len,blen;
    1.89 -    gunichar start;
    1.90 -};
    1.91 -
    1.92 -struct parities {
    1.93 -    int dquote,squote;
    1.94 -};
    1.95 -
    1.96 -struct pending {
    1.97 -    char *dquote,*squote,*rbrack,*sbrack,*cbrack,*unders;
    1.98 -    long squot;
    1.99 -};
   1.100 -
   1.101  void parse_options(int *argc,char ***argv)
   1.102  {
   1.103      GError *err=NULL;
   1.104 @@ -877,7 +801,7 @@
   1.105  	c=g_utf8_get_char(s);
   1.106  	if (c==CHAR_DQUOTE)
   1.107  	    counters->quot++;
   1.108 -	if (c==CHAR_SQUOTE || c==CHAR_OPEN_SQUOTE)
   1.109 +	if (CHAR_IS_SQUOTE(c))
   1.110  	{
   1.111  	    if (s==aline)
   1.112  	    {
   1.113 @@ -887,21 +811,21 @@
   1.114  		 */
   1.115  		if (!g_str_has_prefix(snext,"tis") &&
   1.116  		  !g_str_has_prefix(snext,"Tis"))
   1.117 -		    counters->open_single_quote++;
   1.118 +		    increment_matching(counters,c,TRUE);
   1.119  	    }
   1.120  	    else if (g_unichar_isalpha(g_utf8_get_char(sprev)) &&
   1.121  	      g_unichar_isalpha(g_utf8_get_char(snext)))
   1.122  		/* Do nothing! it's definitely an apostrophe, not a quote */
   1.123  		;
   1.124  	    /* it's outside a word - let's check it out */
   1.125 -	    else if (c==CHAR_OPEN_SQUOTE ||
   1.126 +	    else if (c==CHAR_OPEN_SQUOTE || c==CHAR_LS_QUOTE ||
   1.127  	      g_unichar_isalpha(g_utf8_get_char(snext)))
   1.128  	    {
   1.129  		/* it damwell better BE an openquote */
   1.130  		if (!g_str_has_prefix(snext,"tis") &&
   1.131  		  !g_str_has_prefix(snext,"Tis"))
   1.132  		    /* hardcode a very common exception! */
   1.133 -		    counters->open_single_quote++;
   1.134 +		    increment_matching(counters,c,TRUE);
   1.135  	    }
   1.136  	    else
   1.137  	    {
   1.138 @@ -926,7 +850,7 @@
   1.139  		    guessquote+=8; /* looks like a closequote */
   1.140  		else
   1.141  		    guessquote++;
   1.142 -		if (counters->open_single_quote>counters->close_single_quote)
   1.143 +		if (matching_difference(counters,CHAR_SQUOTE)>0)
   1.144  		    /*
   1.145  		     * Give it the benefit of some doubt,
   1.146  		     * if a squote is already open.
   1.147 @@ -935,7 +859,7 @@
   1.148  		else
   1.149  		    guessquote--;
   1.150  		if (guessquote>=0)
   1.151 -		    counters->close_single_quote++;
   1.152 +		    increment_matching(counters,c,FALSE);
   1.153  	    }
   1.154  	}
   1.155  	if (c!=CHAR_SPACE && c!='-' && c!='.' && c!=CHAR_ASTERISK &&
   1.156 @@ -943,18 +867,11 @@
   1.157  	    isemptyline=FALSE;  /* ignore lines like  *  *  *  as spacers */
   1.158  	if (c==CHAR_UNDERSCORE)
   1.159  	    counters->c_unders++;
   1.160 -	if (c==CHAR_OPEN_CBRACK)
   1.161 -	    counters->c_brack++;
   1.162 -	if (c==CHAR_CLOSE_CBRACK)
   1.163 -	    counters->c_brack--;
   1.164 -	if (c==CHAR_OPEN_RBRACK)
   1.165 -	    counters->r_brack++;
   1.166 -	if (c==CHAR_CLOSE_RBRACK)
   1.167 -	    counters->r_brack--;
   1.168 -	if (c==CHAR_OPEN_SBRACK)
   1.169 -	    counters->s_brack++;
   1.170 -	if (c==CHAR_CLOSE_SBRACK)
   1.171 -	    counters->s_brack--;
   1.172 +	if (c==CHAR_OPEN_CBRACK || c==CHAR_OPEN_RBRACK || c==CHAR_OPEN_SBRACK)
   1.173 +	    increment_matching(counters,c,TRUE);
   1.174 +	if (c==CHAR_CLOSE_CBRACK || c==CHAR_CLOSE_RBRACK ||
   1.175 +	  c==CHAR_CLOSE_SBRACK)
   1.176 +	    increment_matching(counters,c,FALSE);
   1.177  	sprev=s;
   1.178  	s=snext;
   1.179      }
   1.180 @@ -1423,12 +1340,12 @@
   1.181   */
   1.182  void check_for_extra_period(const char *aline,const struct warnings *warnings)
   1.183  {
   1.184 -    const char *s,*t,*s1;
   1.185 +    const char *s,*t,*s1,*sprev;
   1.186      int i;
   1.187      gsize len;
   1.188      gboolean istypo;
   1.189      gchar *testword;
   1.190 -    gunichar *decomposition;
   1.191 +    gunichar c,nc,pc,*decomposition;
   1.192      if (pswit[PARANOID_SWITCH])
   1.193      {
   1.194  	for (t=aline;t=strstr(t,". ");)
   1.195 @@ -1452,8 +1369,9 @@
   1.196  		c3=g_utf8_get_char(g_utf8_offset_to_pointer(t,3));
   1.197  		c4=g_utf8_get_char(g_utf8_offset_to_pointer(t,4));
   1.198  		c5=g_utf8_get_char(g_utf8_offset_to_pointer(t,5));
   1.199 -		if (c2==CHAR_SQUOTE && g_unichar_islower(c3) &&
   1.200 -		  c4==CHAR_SPACE && g_unichar_isupper(c5))
   1.201 +		if (CHAR_IS_APOSTROPHE(c2) &&
   1.202 +		  g_unichar_islower(c3) && c4==CHAR_SPACE &&
   1.203 +		  g_unichar_isupper(c5))
   1.204  		{
   1.205  		    t=g_utf8_next_char(t);
   1.206  		    continue;
   1.207 @@ -1468,14 +1386,22 @@
   1.208  		/* we have something to investigate */
   1.209  		istypo=TRUE;
   1.210  		/* so let's go back and find out */
   1.211 -		for (s1=g_utf8_prev_char(t);s1>=aline &&
   1.212 -		  (g_unichar_isalpha(g_utf8_get_char(s1)) ||
   1.213 -		  g_unichar_isdigit(g_utf8_get_char(s1)) ||
   1.214 -		  g_utf8_get_char(s1)==CHAR_SQUOTE &&
   1.215 -		  g_unichar_isalpha(g_utf8_get_char(g_utf8_next_char(s1))) &&
   1.216 -		  g_unichar_isalpha(g_utf8_get_char(g_utf8_prev_char(s1))));
   1.217 -		  s1=g_utf8_prev_char(s1))
   1.218 -		    ;
   1.219 +		nc=g_utf8_get_char(t);
   1.220 +		s1=g_utf8_prev_char(t);
   1.221 +		c=g_utf8_get_char(s1);
   1.222 +		sprev=g_utf8_prev_char(s1);
   1.223 +		pc=g_utf8_get_char(sprev);
   1.224 +		while (s1>=aline &&
   1.225 +		  (g_unichar_isalpha(c) || g_unichar_isdigit(c) ||
   1.226 +		  g_unichar_isalpha(pc) && CHAR_IS_APOSTROPHE(c) &&
   1.227 +		  g_unichar_isalpha(nc)))
   1.228 +		{
   1.229 +		    nc=c;
   1.230 +		    s1=sprev;
   1.231 +		    c=pc;
   1.232 +		    sprev=g_utf8_prev_char(s1);
   1.233 +		    pc=g_utf8_get_char(sprev);
   1.234 +		}
   1.235  		s1=g_utf8_next_char(s1);
   1.236  		s=strchr(s1,'.');
   1.237  		if (s)
   1.238 @@ -1600,7 +1526,7 @@
   1.239      gchar *testword;
   1.240      int i,vowel,consonant,*dupcnt;
   1.241      gboolean isdup,istypo,alower;
   1.242 -    gunichar c;
   1.243 +    gunichar c,pc;
   1.244      long offset,len;
   1.245      gsize decomposition_len;
   1.246      for (s=aline;*s;)
   1.247 @@ -1646,11 +1572,14 @@
   1.248  		     *   French contractions like l'Abbe
   1.249  		     */
   1.250  		    offset=g_utf8_pointer_to_offset(inword,t);
   1.251 +		    if (offset>0)
   1.252 +			pc=g_utf8_get_char(g_utf8_prev_char(t));
   1.253 +		    else
   1.254 +			pc='\0';
   1.255  		    if (offset==2 && c=='m' && g_utf8_get_char(nt)=='c' ||
   1.256  		      offset==3 && c=='m' && g_utf8_get_char(nt)=='a' &&
   1.257  		      g_utf8_get_char(g_utf8_next_char(nt))=='c' ||
   1.258 -		      offset>0 &&
   1.259 -		      g_utf8_get_char(g_utf8_prev_char(t))==CHAR_SQUOTE)
   1.260 +		      CHAR_IS_APOSTROPHE(pc))
   1.261  			; /* do nothing! */
   1.262  		    else
   1.263  			istypo=TRUE;
   1.264 @@ -2050,8 +1979,7 @@
   1.265  	{
   1.266  	    c=nc;
   1.267  	    nc=g_utf8_get_char(g_utf8_next_char(s));
   1.268 -	    if ((c==CHAR_SQUOTE || c==CHAR_OPEN_SQUOTE) && (s==aline ||
   1.269 -	      s>aline &&
   1.270 +	    if (CHAR_IS_SQUOTE(c) && (s==aline || s>aline &&
   1.271  	      !g_unichar_isalpha(g_utf8_get_char(g_utf8_prev_char(s))) ||
   1.272  	      !g_unichar_isalpha(nc)))
   1.273  	    {
   1.274 @@ -2166,7 +2094,11 @@
   1.275   */
   1.276  void check_for_spaced_quotes(const char *aline)
   1.277  {
   1.278 +    int i;
   1.279      const char *s,*t;
   1.280 +    const gunichar single_quotes[]={CHAR_SQUOTE,CHAR_OPEN_SQUOTE,CHAR_LS_QUOTE,
   1.281 +      CHAR_RS_QUOTE};
   1.282 +    GString *pattern;
   1.283      s=aline;
   1.284      while ((t=strstr(s," \" ")))
   1.285      {
   1.286 @@ -2179,30 +2111,26 @@
   1.287  	    cnt_punct++;
   1.288  	s=g_utf8_next_char(g_utf8_next_char(t));
   1.289      }
   1.290 -    s=aline;
   1.291 -    while ((t=strstr(s," ' ")))
   1.292 +    pattern=g_string_new(NULL);
   1.293 +    for(i=0;i<G_N_ELEMENTS(single_quotes);i++)
   1.294      {
   1.295 -	if (pswit[ECHO_SWITCH])
   1.296 -	    g_print("\n%s\n",aline);
   1.297 -	if (!pswit[OVERVIEW_SWITCH])
   1.298 -	    g_print("    Line %ld column %ld - Spaced singlequote?\n",
   1.299 -	      linecnt,g_utf8_pointer_to_offset(aline,t)+1);
   1.300 -	else
   1.301 -	    cnt_punct++;
   1.302 -	s=g_utf8_next_char(g_utf8_next_char(t));
   1.303 +	g_string_assign(pattern," ");
   1.304 +	g_string_append_unichar(pattern,single_quotes[i]);
   1.305 +	g_string_append_c(pattern,' ');
   1.306 +	s=aline;
   1.307 +	while ((t=strstr(s,pattern->str)))
   1.308 +	{
   1.309 +	    if (pswit[ECHO_SWITCH])
   1.310 +		g_print("\n%s\n",aline);
   1.311 +	    if (!pswit[OVERVIEW_SWITCH])
   1.312 +		g_print("    Line %ld column %ld - Spaced singlequote?\n",
   1.313 +		  linecnt,g_utf8_pointer_to_offset(aline,t)+1);
   1.314 +	    else
   1.315 +		cnt_punct++;
   1.316 +	    s=g_utf8_next_char(g_utf8_next_char(t));
   1.317 +	}
   1.318      }
   1.319 -    s=aline;
   1.320 -    while ((t=strstr(s," ` ")))
   1.321 -    {
   1.322 -	if (pswit[ECHO_SWITCH])
   1.323 -	    g_print("\n%s\n",aline);
   1.324 -	if (!pswit[OVERVIEW_SWITCH])
   1.325 -	    g_print("    Line %ld column %ld - Spaced singlequote?\n",
   1.326 -	      linecnt,g_utf8_pointer_to_offset(aline,t)+1);
   1.327 -	else
   1.328 -	    cnt_punct++;
   1.329 -	s=g_utf8_next_char(g_utf8_next_char(t));
   1.330 -    }
   1.331 +    g_string_free(pattern,TRUE);
   1.332  }
   1.333  
   1.334  /*
   1.335 @@ -2223,7 +2151,7 @@
   1.336  	pc=c;
   1.337  	c=nc;
   1.338  	nc=g_utf8_get_char(g_utf8_next_char(s));
   1.339 -	if (c==CHAR_SQUOTE && nc=='S' && g_unichar_islower(pc))
   1.340 +	if (CHAR_IS_APOSTROPHE(c) && nc=='S' && g_unichar_islower(pc))
   1.341  	{
   1.342  	    if (pswit[ECHO_SWITCH])
   1.343  		g_print("\n%s\n",aline);
   1.344 @@ -2255,8 +2183,7 @@
   1.345  	s=g_utf8_prev_char(aline+lbytes);
   1.346  	c1=g_utf8_get_char(s);
   1.347  	c2=g_utf8_get_char(g_utf8_prev_char(s));
   1.348 -	if ((c1==CHAR_DQUOTE || c1==CHAR_SQUOTE || c1==CHAR_OPEN_SQUOTE) &&
   1.349 -	  c2==CHAR_SPACE)
   1.350 +	if ((c1==CHAR_DQUOTE || CHAR_IS_SQUOTE(c1)) && c2==CHAR_SPACE)
   1.351  	{
   1.352  	    if (pswit[ECHO_SWITCH])
   1.353  		g_print("\n%s\n",aline);
   1.354 @@ -2268,7 +2195,7 @@
   1.355  	}
   1.356  	c1=g_utf8_get_char(aline);
   1.357  	c2=g_utf8_get_char(g_utf8_next_char(aline));
   1.358 -	if ((c1==CHAR_SQUOTE || c1==CHAR_OPEN_SQUOTE) && c2==CHAR_SPACE)
   1.359 +	if (CHAR_IS_SQUOTE(c1) && c2==CHAR_SPACE)
   1.360  	{
   1.361  	    if (pswit[ECHO_SWITCH])
   1.362  		g_print("\n%s\n",aline);
   1.363 @@ -2470,8 +2397,7 @@
   1.364      }
   1.365      if (pending->squote)
   1.366      {
   1.367 -	if (c!=CHAR_SQUOTE && c!=CHAR_OPEN_SQUOTE || pswit[QPARA_SWITCH] ||
   1.368 -	  pending->squot)
   1.369 +	if (!CHAR_IS_SQUOTE(c) || pswit[QPARA_SWITCH] || pending->squot)
   1.370  	{
   1.371  	    if (!pswit[OVERVIEW_SWITCH])
   1.372  	    {
   1.373 @@ -2558,28 +2484,39 @@
   1.374  void check_for_mismatched_quotes(const struct counters *counters,
   1.375    struct pending *pending)
   1.376  {
   1.377 +    int squote_straight,squote_curved;
   1.378      if (counters->quot%2)
   1.379  	pending->dquote=
   1.380  	  g_strdup_printf("    Line %ld - Mismatched quotes",linecnt);
   1.381 -    if (pswit[SQUOTE_SWITCH] && counters->open_single_quote &&
   1.382 -      counters->open_single_quote!=counters->close_single_quote)
   1.383 -	pending->squote=
   1.384 -	  g_strdup_printf("    Line %ld - Mismatched singlequotes?",linecnt);
   1.385 -    if (pswit[SQUOTE_SWITCH] && counters->open_single_quote &&
   1.386 -      counters->open_single_quote!=counters->close_single_quote &&
   1.387 -      counters->open_single_quote!=counters->close_single_quote+1)
   1.388 -	/*
   1.389 -	 * Flag it to be noted regardless of the
   1.390 -	 * first char of the next para.
   1.391 -	 */
   1.392 -	pending->squot=1;
   1.393 -    if (counters->r_brack)
   1.394 +    if (pswit[SQUOTE_SWITCH])
   1.395 +    {
   1.396 +	if (matching_count(counters,CHAR_SQUOTE,TRUE))
   1.397 +	    squote_straight=matching_difference(counters,CHAR_SQUOTE);
   1.398 +	else
   1.399 +	    squote_straight=0;
   1.400 +	if (matching_count(counters,CHAR_LS_QUOTE,TRUE))
   1.401 +	    squote_curved=matching_difference(counters,CHAR_LS_QUOTE);
   1.402 +	else
   1.403 +	    squote_curved=0;
   1.404 +	if (squote_straight || squote_curved)
   1.405 +	    pending->squote=
   1.406 +	      g_strdup_printf("    Line %ld - Mismatched singlequotes?",
   1.407 +	      linecnt);
   1.408 +	if (squote_straight && squote_straight!=1 ||
   1.409 +	  squote_curved && squote_curved!=1)
   1.410 +	    /*
   1.411 +	     * Flag it to be noted regardless of the
   1.412 +	     * first char of the next para.
   1.413 +	     */
   1.414 +	    pending->squot=1;
   1.415 +    }
   1.416 +    if (matching_difference(counters,CHAR_OPEN_RBRACK))
   1.417  	pending->rbrack=
   1.418  	  g_strdup_printf("    Line %ld - Mismatched round brackets?",linecnt);
   1.419 -    if (counters->s_brack)
   1.420 +    if (matching_difference(counters,CHAR_OPEN_SBRACK))
   1.421  	pending->sbrack=
   1.422  	  g_strdup_printf("    Line %ld - Mismatched square brackets?",linecnt);
   1.423 -    if (counters->c_brack)
   1.424 +    if (matching_difference(counters,CHAR_OPEN_CBRACK))
   1.425  	pending->cbrack=
   1.426  	  g_strdup_printf("    Line %ld - Mismatched curly brackets?",linecnt);
   1.427      if (counters->c_unders%2)
   1.428 @@ -2603,6 +2540,7 @@
   1.429  {
   1.430      gboolean letter_on_line=FALSE;
   1.431      const char *s;
   1.432 +    gunichar c;
   1.433      for (s=prevline;*s;s=g_utf8_next_char(s))
   1.434  	if (g_unichar_isalpha(g_utf8_get_char(s)))
   1.435  	{
   1.436 @@ -2619,12 +2557,12 @@
   1.437      if (letter_on_line && last->blen>2 && start_para_line<linecnt-1 &&
   1.438        g_utf8_get_char(prevline)>CHAR_SPACE)
   1.439      {
   1.440 -	for (s=g_utf8_prev_char(prevline+strlen(prevline));
   1.441 -	  (g_utf8_get_char(s)==CHAR_DQUOTE ||
   1.442 -	  g_utf8_get_char(s)==CHAR_SQUOTE) &&
   1.443 -	  g_utf8_get_char(s)>CHAR_SPACE && s>prevline;
   1.444 -	  s=g_utf8_prev_char(s))
   1.445 -	    ;
   1.446 +	s=prevline+strlen(prevline);
   1.447 +	do
   1.448 +	{
   1.449 +	    s=g_utf8_prev_char(s);
   1.450 +	    c=g_utf8_get_char(s);
   1.451 +	} while (CHAR_IS_CLOSING_QUOTE(c) && c>CHAR_SPACE && s>prevline);
   1.452  	for (;s>prevline;s=g_utf8_prev_char(s))
   1.453  	{
   1.454  	    if (g_unichar_isalpha(g_utf8_get_char(s)))
   1.455 @@ -2857,6 +2795,7 @@
   1.456  	g_tree_foreach(qword,report_duplicate_queries,NULL);
   1.457      g_tree_unref(qword);
   1.458      g_tree_unref(qperiod);
   1.459 +    counters_destroy(&counters);
   1.460      g_set_print_handler(NULL);
   1.461      print_as_windows_1252(NULL);
   1.462      if (pswit[MARKUP_SWITCH])  
   1.463 @@ -3066,10 +3005,10 @@
   1.464      }
   1.465      /* we didn't find a punctuated number - do the regular getword thing */
   1.466      g_string_truncate(word,0);
   1.467 -    for (;g_unichar_isdigit(g_utf8_get_char(*ptr)) ||
   1.468 -      g_unichar_isalpha(g_utf8_get_char(*ptr)) ||
   1.469 -      g_utf8_get_char(*ptr)=='\'';*ptr=g_utf8_next_char(*ptr))
   1.470 -	g_string_append_unichar(word,g_utf8_get_char(*ptr));
   1.471 +    c=g_utf8_get_char(*ptr);
   1.472 +    for (;g_unichar_isdigit(c) || g_unichar_isalpha(c) || CHAR_IS_APOSTROPHE(c);
   1.473 +      *ptr=g_utf8_next_char(*ptr),c=g_utf8_get_char(*ptr))
   1.474 +	g_string_append_unichar(word,c);
   1.475      return g_string_free(word,FALSE);
   1.476  }
   1.477