# HG changeset patch
# User ali <ali@juiblex.co.uk>
# Date 1369895484 -3600
# Node ID aa916da2e452f74c1042749418904d659ed89693
# Parent  1016349e619fe15277a41597cf9b8811c5411b25
Switch to using UTF-8 internally

diff -r 1016349e619f -r aa916da2e452 bookloupe/bookloupe.c
--- a/bookloupe/bookloupe.c	Tue May 28 15:17:19 2013 +0100
+++ b/bookloupe/bookloupe.c	Thu May 30 07:31:24 2013 +0100
@@ -119,8 +119,6 @@
     "among", "those", "into", "whom", "having", "thence", ""
 }; 
 
-char vowels[] = "aeiouàáâãäæèéêëìíîïòóôõöùúûü";
-
 struct {
     char *htmlent;
     char *htmlnum;
@@ -347,16 +345,13 @@
 
 gchar *running_from;
 
-int mixdigit(const char *);
+gboolean mixdigit(const char *);
 gchar *getaword(const char **);
 char *flgets(char **,long);
-gboolean gcisalpha(unsigned char);
-gboolean gcisdigit(unsigned char);
-gboolean gcisletter(unsigned char);
 void postprocess_for_HTML(char *);
 char *linehasmarkup(char *);
 char *losemarkup(char *);
-int tagcomp(const char *,const char *);
+gboolean tagcomp(const char *,const char *);
 char *loseentities(char *);
 gboolean isroman(const char *);
 void postprocess_for_DP(char *);
@@ -385,7 +380,7 @@
 
 struct line_properties {
     unsigned int len,blen;
-    char start;
+    gunichar start;
 };
 
 struct parities {
@@ -462,8 +457,8 @@
     gchar *usertypo_file;
     gboolean okay;
     int i;
-    gsize len;
-    gchar *contents,**lines;
+    gsize len,nb;
+    gchar *contents,*utf8,**lines;
     usertypo_file=g_strdup("bookloupe.typ");
     okay=file_get_contents_text(usertypo_file,&contents,&len,&err);
     if (g_error_matches(err,G_FILE_ERROR,G_FILE_ERROR_NOENT))
@@ -490,7 +485,7 @@
     if (g_error_matches(err,G_FILE_ERROR,G_FILE_ERROR_NOENT))
     {
 	g_free(usertypo_file);
-	printf("   --> I couldn't find bookloupe.typ "
+	g_print("   --> I couldn't find bookloupe.typ "
 	  "-- proceeding without user typos.\n");
 	return;
     }
@@ -501,7 +496,10 @@
 	g_clear_error(&err);
 	exit(1);
     }
-    lines=g_strsplit(contents,"\n",0);
+    utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
+    g_free(contents);
+    lines=g_strsplit_set(utf8,"\r\n",0);
+    g_free(utf8);
     usertypo=g_tree_new_full((GCompareDataFunc)strcmp,NULL,g_free,NULL);
     for (i=0;lines[i];i++)
 	if (*(unsigned char *)lines[i]>'!')
@@ -511,49 +509,6 @@
     g_free(lines);
 }
 
-#if 0
-/*
- * read_etext:
- *
- * Read an etext returning an array of lines. Lines are normally expected
- * to be terminated by CR LF. Solitary LFs delimit lines but are left
- * embedded at the end of the line for further processing. Solitary CRs
- * do not delimit lines.
- */
-gchar **read_etext(const char *filename,GError **err)
-{
-    int i;
-    const char *s,*t;
-    gchar *contents;
-    gchar **raw_lines;
-    GPtrArray *lines;
-    gsize len;
-    if (!g_file_get_contents(filename,&contents,&len,err))
-	return NULL;
-    raw_lines=g_strsplit(contents,"\r\n",0);
-    lines=g_ptr_array_sized_new(g_strv_length(raw_lines)+1);
-    for (i=0;raw_lines[i];i++)
-    {
-	t=strchr(raw_lines[i],'\n');
-	if (t)
-	{
-	    s=raw_lines[i];
-	    while ((t=strchr(s,'\n')))
-	    {
-		g_ptr_array_add(lines,g_strndup(s,t-s+1));
-		s=t+1;
-	    }
-	    g_ptr_array_add(lines,g_strdup(s));
-	    g_free(raw_lines[i]);
-	}
-	else
-	    g_ptr_array_add(lines,raw_lines[i]);
-    }
-    g_free(raw_lines);
-    g_ptr_array_add(lines,NULL);
-    return (gchar **)g_ptr_array_free(lines,FALSE);
-}
-#else
 /*
  * read_etext:
  *
@@ -562,13 +517,14 @@
  */
 gchar *read_etext(const char *filename,GError **err)
 {
-    gchar *contents;
-    gsize len;
+    gchar *contents,*utf8;
+    gsize len,nb;
     if (!g_file_get_contents(filename,&contents,&len,err))
 	return NULL;
-    return contents;
+    utf8=g_convert(contents,len,"UTF-8","WINDOWS-1252",NULL,&nb,NULL);
+    g_free(contents);
+    return utf8;
 }
-#endif
 
 int main(int argc,char **argv)
 {
@@ -580,35 +536,35 @@
     procfile(argv[1]);
     if (pswit[OVERVIEW_SWITCH])
     {
-	printf("    Checked %ld lines of %ld (head+foot = %ld)\n\n",
+	g_print("    Checked %ld lines of %ld (head+foot = %ld)\n\n",
 	  checked_linecnt,linecnt,linecnt-checked_linecnt);
-	printf("    --------------- Queries found --------------\n");
+	g_print("    --------------- Queries found --------------\n");
 	if (cnt_long)
-	    printf("    Long lines:		    %14ld\n",cnt_long);
+	    g_print("    Long lines:		    %14ld\n",cnt_long);
 	if (cnt_short)
-	    printf("    Short lines:		   %14ld\n",cnt_short);
+	    g_print("    Short lines:		   %14ld\n",cnt_short);
 	if (cnt_lineend)
-	    printf("    Line-end problems:	     %14ld\n",cnt_lineend);
+	    g_print("    Line-end problems:	     %14ld\n",cnt_lineend);
 	if (cnt_word)
-	    printf("    Common typos:		  %14ld\n",cnt_word);
+	    g_print("    Common typos:		  %14ld\n",cnt_word);
 	if (cnt_dquot)
-	    printf("    Unmatched quotes:	      %14ld\n",cnt_dquot);
+	    g_print("    Unmatched quotes:	      %14ld\n",cnt_dquot);
 	if (cnt_squot)
-	    printf("    Unmatched SingleQuotes:	%14ld\n",cnt_squot);
+	    g_print("    Unmatched SingleQuotes:	%14ld\n",cnt_squot);
 	if (cnt_brack)
-	    printf("    Unmatched brackets:	    %14ld\n",cnt_brack);
+	    g_print("    Unmatched brackets:	    %14ld\n",cnt_brack);
 	if (cnt_bin)
-	    printf("    Non-ASCII characters:	  %14ld\n",cnt_bin);
+	    g_print("    Non-ASCII characters:	  %14ld\n",cnt_bin);
 	if (cnt_odd)
-	    printf("    Proofing characters:	   %14ld\n",cnt_odd);
+	    g_print("    Proofing characters:	   %14ld\n",cnt_odd);
 	if (cnt_punct)
-	    printf("    Punctuation & spacing queries: %14ld\n",cnt_punct);
+	    g_print("    Punctuation & spacing queries: %14ld\n",cnt_punct);
 	if (cnt_dash)
-	    printf("    Non-standard dashes:	   %14ld\n",cnt_dash);
+	    g_print("    Non-standard dashes:	   %14ld\n",cnt_dash);
 	if (cnt_html)
-	    printf("    Possible HTML tags:	    %14ld\n",cnt_html);
-	printf("\n");
-	printf("    TOTAL QUERIES		  %14ld\n",
+	    g_print("    Possible HTML tags:	    %14ld\n",cnt_html);
+	g_print("\n");
+	g_print("    TOTAL QUERIES		  %14ld\n",
 	  cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+
 	  cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend);
     }
@@ -628,10 +584,10 @@
  */
 struct first_pass_results *first_pass(const char *etext)
 {
-    char laststart=CHAR_SPACE;
+    gunichar laststart=CHAR_SPACE;
     const char *s;
     gchar *lc_line;
-    int i,j,llen;
+    int i,j,lbytes,llen;
     gchar **lines;
     unsigned int lastlen=0,lastblen=0;
     long spline=0,nspline=0;
@@ -640,27 +596,28 @@
     lines=g_strsplit(etext,"\n",0);
     for (j=0;lines[j];j++)
     {
-	llen=strlen(lines[j]);
-	while(lines[j][llen-1]=='\r')
-	    lines[j][llen--]='\0';
+	lbytes=strlen(lines[j]);
+	while (lines[j][lbytes-1]=='\r')
+	    lines[j][--lbytes]='\0';
+	llen=g_utf8_strlen(lines[j],lbytes);
 	linecnt++;
 	if (strstr(lines[j],"*END") && strstr(lines[j],"SMALL PRINT") &&
 	  (strstr(lines[j],"PUBLIC DOMAIN") || strstr(lines[j],"COPYRIGHT")))
 	{
 	    if (spline)
-		printf("   --> Duplicate header?\n");
+		g_print("   --> Duplicate header?\n");
 	    spline=linecnt+1;   /* first line of non-header text, that is */
 	}
 	if (!strncmp(lines[j],"*** START",9) &&
 	  strstr(lines[j],"PROJECT GUTENBERG"))
 	{
 	    if (nspline)
-		printf("   --> Duplicate header?\n");
+		g_print("   --> Duplicate header?\n");
 	    nspline=linecnt+1;   /* first line of non-header text, that is */
 	}
 	if (spline || nspline)
 	{
-	    lc_line=g_ascii_strdown(lines[j],llen);
+	    lc_line=g_utf8_strdown(lines[j],lbytes);
 	    if (strstr(lc_line,"end") && strstr(lc_line,"project gutenberg"))
 	    {
 		if (strstr(lc_line,"end")<strstr(lc_line,"project gutenberg"))
@@ -669,7 +626,7 @@
 		    {
 			/* it's an old-form header - we can detect duplicates */
 			if (!nspline)
-			    printf("   --> Duplicate footer?\n");
+			    g_print("   --> Duplicate footer?\n");
 		    }
 		    else
 			results.footerline=linecnt;
@@ -684,19 +641,21 @@
 	if (results.footerline)
 	    continue;    /* don't count the boilerplate in the footer */
 	results.totlen+=llen;
-	for (i=0;i<llen;i++)
+	for (s=lines[j];*s;s=g_utf8_next_char(s))
 	{
-	    if ((unsigned char)lines[j][i]>127)
+	    if (g_utf8_get_char(s)>127)
 		results.binlen++;
-	    if (gcisalpha(lines[j][i]))
+	    if (g_unichar_isalpha(g_utf8_get_char(s)))
 		results.alphalen++;
-	    if (i>0 && lines[j][i]==CHAR_DQUOTE && isalpha(lines[j][i-1]))
+	    if (s>lines[j] && g_utf8_get_char(s)==CHAR_DQUOTE &&
+	      isalpha(g_utf8_get_char(g_utf8_prev_char(s))))
 		results.endquote_count++;
 	}
 	if (llen>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE && lastblen>2 &&
 	  lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE)
 	    results.shortline++;
-	if (llen>0 && (unsigned char)lines[j][llen-1]<=CHAR_SPACE)
+	if (lbytes>0 &&
+	  g_utf8_get_char(g_utf8_prev_char(lines[j]+lbytes))<=CHAR_SPACE)
 	    cnt_spacend++;
 	if (strstr(lines[j],".,"))
 	    results.dotcomma++;
@@ -704,17 +663,19 @@
 	/* locase text on the line */
 	if (strchr(lines[j],'*'))
 	{
-	    for (s=lines[j];*s;s++)
-		if (*s>='a' && *s<='z')
+	    for (s=lines[j];*s;s=g_utf8_next_char(s))
+		if (g_unichar_islower(g_utf8_get_char(s)))
 		    break;
-	     if (*s)
+	    if (*s)
 		results.astline++;
 	}
 	if (strchr(lines[j],'/'))
 	    results.fslashline++;
-	for (i=llen-1;i>0 && (unsigned char)lines[j][i]<=CHAR_SPACE;i--)
+	for (s=g_utf8_prev_char(lines[j]+lbytes);
+	  s>lines[j] && g_utf8_get_char(s)<=CHAR_SPACE;s=g_utf8_prev_char(s))
 	    ;
-	if (i>1 && lines[j][i]=='-' && lines[j][i-1]!='-')
+	if (s>g_utf8_next_char(lines[j]) && g_utf8_get_char(s)=='-' &&
+	  g_utf8_get_char(g_utf8_prev_char(s))!='-')
 	    results.hyphens++;
 	if (llen>LONGEST_PG_LINE)
 	    results.longline++;
@@ -729,15 +690,15 @@
 		results.htmcount+=4; /* bonus marks! */
 	}
 	/* Check for spaced em-dashes */
-	if (lines[j][0] && (s=strstr(lines[j]+1,"--")))
+	if (lines[j][0] && (s=strstr(g_utf8_next_char(lines[j]),"--")))
 	{
 	    results.emdash++;
-	    if (s[-1]==CHAR_SPACE || (s[2]==CHAR_SPACE))
+	    if (s[-1]==CHAR_SPACE || s[2]==CHAR_SPACE)
 		results.space_emdash++;
-	    if (s[-1]==CHAR_SPACE && (s[2]==CHAR_SPACE))
+	    if (s[-1]==CHAR_SPACE && s[2]==CHAR_SPACE)
 		/* count of em-dashes with spaces both sides */
 		results.non_PG_space_emdash++;
-	    if (s[-1]!=CHAR_SPACE && (s[2]!=CHAR_SPACE))
+	    if (s[-1]!=CHAR_SPACE && s[2]!=CHAR_SPACE)
 		/* count of PG-type em-dashes with no spaces */
 		results.PG_space_emdash++;
 	}
@@ -772,13 +733,13 @@
 {
     static struct warnings warnings={0};
     if (cnt_spacend>0)
-	printf("   --> %ld lines in this file have white space at end\n",
+	g_print("   --> %ld lines in this file have white space at end\n",
 	  cnt_spacend);
     warnings.dotcomma=1;
     if (results->dotcomma>5)
     {
 	warnings.dotcomma=0;
-	printf("   --> %ld lines in this file contain '.,'. "
+	g_print("   --> %ld lines in this file contain '.,'. "
 	  "Not reporting them.\n",results->dotcomma);
     }
     /*
@@ -789,7 +750,7 @@
     if (results->shortline>50 || results->shortline*10>linecnt)
     {
 	warnings.shortline=0;
-	printf("   --> %ld lines in this file are short. "
+	g_print("   --> %ld lines in this file are short. "
 	  "Not reporting short lines.\n",results->shortline);
     }
     /*
@@ -800,7 +761,7 @@
     if (results->longline>50 || results->longline*10>linecnt)
     {
 	warnings.longline=0;
-	printf("   --> %ld lines in this file are long. "
+	g_print("   --> %ld lines in this file are long. "
 	  "Not reporting long lines.\n",results->longline);
     }
     /* If more than 10 lines contain asterisks, don't bother reporting them. */
@@ -808,7 +769,7 @@
     if (results->astline>10)
     {
 	warnings.ast=0;
-	printf("   --> %ld lines in this file contain asterisks. "
+	g_print("   --> %ld lines in this file contain asterisks. "
 	  "Not reporting them.\n",results->astline);
     }
     /*
@@ -819,7 +780,7 @@
     if (results->fslashline>10)
     {
 	warnings.fslash=0;
-	printf("   --> %ld lines in this file contain forward slashes. "
+	g_print("   --> %ld lines in this file contain forward slashes. "
 	  "Not reporting them.\n",results->fslashline);
     }
     /*
@@ -830,7 +791,7 @@
     if (results->endquote_count>20)
     {
 	warnings.endquote=0;
-	printf("   --> %ld lines in this file contain unpunctuated endquotes. "
+	g_print("   --> %ld lines in this file contain unpunctuated endquotes. "
 	  "Not reporting them.\n",results->endquote_count);
     }
     /*
@@ -841,7 +802,7 @@
     if (results->standalone_digit>10)
     {
 	warnings.digit=0;
-	printf("   --> %ld lines in this file contain standalone 0s and 1s. "
+	g_print("   --> %ld lines in this file contain standalone 0s and 1s. "
 	  "Not reporting them.\n",results->standalone_digit);
     }
     /*
@@ -852,16 +813,16 @@
     if (results->hyphens>20)
     {
 	warnings.hyphen=0;
-	printf("   --> %ld lines in this file have hyphens at end. "
+	g_print("   --> %ld lines in this file have hyphens at end. "
 	  "Not reporting them.\n",results->hyphens);
     }
     if (results->htmcount>20 && !pswit[MARKUP_SWITCH])
     {
-	printf("   --> Looks like this is HTML. Switching HTML mode ON.\n");
+	g_print("   --> Looks like this is HTML. Switching HTML mode ON.\n");
 	pswit[MARKUP_SWITCH]=1;
     }
     if (results->verylongline>0)
-	printf("   --> %ld lines in this file are VERY long!\n",
+	g_print("   --> %ld lines in this file are VERY long!\n",
 	  results->verylongline);
     /*
      * If there are more non-PG spaced dashes than PG em-dashes,
@@ -874,7 +835,7 @@
       results->PG_space_emdash)
     {
 	warnings.dash=0;
-	printf("   --> There are %ld spaced dashes and em-dashes. "
+	g_print("   --> There are %ld spaced dashes and em-dashes. "
 	  "Not reporting them.\n",
 	  results->spacedash+results->non_PG_space_emdash);
     }
@@ -882,19 +843,19 @@
     warnings.bin=1;
     if (results->binlen*4>results->totlen)
     {
-	printf("   --> This file does not appear to be ASCII. "
+	g_print("   --> This file does not appear to be ASCII. "
 	  "Terminating. Best of luck with it!\n");
 	exit(1);
     }
     if (results->alphalen*4<results->totlen)
     {
-	printf("   --> This file does not appear to be text. "
+	g_print("   --> This file does not appear to be text. "
 	  "Terminating. Best of luck with it!\n");
 	exit(1);
     }
     if (results->binlen*100>results->totlen || results->binlen>100)
     {
-	printf("   --> There are a lot of foreign letters here. "
+	g_print("   --> There are a lot of foreign letters here. "
 	  "Not reporting them.\n");
 	warnings.bin=0;
     }
@@ -902,26 +863,26 @@
     if (results->Dutchcount>50)
     {
 	warnings.isDutch=TRUE;
-	printf("   --> This looks like Dutch - "
+	g_print("   --> This looks like Dutch - "
 	  "switching off dashes and warnings for 's Middags case.\n");
     }
     warnings.isFrench=FALSE;
     if (results->Frenchcount>50)
     {
 	warnings.isFrench=TRUE;
-	printf("   --> This looks like French - "
+	g_print("   --> This looks like French - "
 	  "switching off some doublepunct.\n");
     }
     if (results->firstline && results->footerline)
-	printf("    The PG header and footer appear to be already on.\n");
+	g_print("    The PG header and footer appear to be already on.\n");
     else
     {
 	if (results->firstline)
-	    printf("    The PG header is on - no footer.\n");
+	    g_print("    The PG header is on - no footer.\n");
 	if (results->footerline)
-	    printf("    The PG footer is on - no header.\n");
+	    g_print("    The PG footer is on - no header.\n");
     }
-    printf("\n");
+    g_print("\n");
     if (pswit[VERBOSE_SWITCH])
     {
 	warnings.bin=1;
@@ -934,7 +895,7 @@
 	warnings.fslash=1;
 	warnings.hyphen=1;
 	warnings.endquote=1;
-	printf("   *** Verbose output is ON -- you asked for it! ***\n");
+	g_print("   *** Verbose output is ON -- you asked for it! ***\n");
     }
     if (warnings.isDutch)
 	warnings.dash=0;
@@ -942,9 +903,9 @@
       results->footerline>results->firstline &&
       results->footerline-results->firstline<100)
     {
-	printf("   --> I don't really know where this text starts. \n");
-	printf("       There are no reference points.\n");
-	printf("       I'm going to have to report the header and footer "
+	g_print("   --> I don't really know where this text starts. \n");
+	g_print("       There are no reference points.\n");
+	g_print("       I'm going to have to report the header and footer "
 	  "as well.\n");
 	results->firstline=0;
     }
@@ -968,12 +929,16 @@
     int guessquote=0;
     /* assume the line is empty until proven otherwise */
     gboolean isemptyline=TRUE;
-    const char *s=aline;
+    const char *s=aline,*sprev,*snext;
+    gunichar c;
+    sprev=NULL;
     while (*s)
     {
-	if (*s==CHAR_DQUOTE)
+	snext=g_utf8_next_char(s);
+	c=g_utf8_get_char(s);
+	if (c==CHAR_DQUOTE)
 	    counters->quot++;
-	if (*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE)
+	if (c==CHAR_SQUOTE || c==CHAR_OPEN_SQUOTE)
 	{
 	    if (s==aline)
 	    {
@@ -981,17 +946,21 @@
 		 * At start of line, it can only be an openquote.
 		 * Hardcode a very common exception!
 		 */
-		if (strncmp(s+2,"tis",3) && strncmp(s+2,"Tis",3))
+		if (!g_str_has_prefix(snext,"tis") &&
+		  !g_str_has_prefix(snext,"Tis"))
 		    counters->open_single_quote++;
 	    }
-	    else if (gcisalpha(s[-1]) && gcisalpha(s[1]))
+	    else if (g_unichar_isalpha(g_utf8_get_char(sprev)) &&
+	      g_unichar_isalpha(g_utf8_get_char(snext)))
 		/* Do nothing! it's definitely an apostrophe, not a quote */
 		;
 	    /* it's outside a word - let's check it out */
-	    else if (*s==CHAR_OPEN_SQUOTE || gcisalpha(s[1]))
+	    else if (c==CHAR_OPEN_SQUOTE ||
+	      g_unichar_isalpha(g_utf8_get_char(snext)))
 	    {
 		/* it damwell better BE an openquote */
-		if (strncmp(s+1,"tis",3) && strncmp(s+1,"Tis",3))
+		if (!g_str_has_prefix(snext,"tis") &&
+		  !g_str_has_prefix(snext,"Tis"))
 		    /* hardcode a very common exception! */
 		    counters->open_single_quote++;
 	    }
@@ -999,20 +968,22 @@
 	    {
 		/* now - is it a closequote? */
 		guessquote=0;   /* accumulate clues */
-		if (gcisalpha(s[-1]))
+		if (g_unichar_isalpha(g_utf8_get_char(sprev)))
 		{
 		    /* it follows a letter - could be either */
 		    guessquote++;
-		    if (s[-1]=='s')
+		    if (g_utf8_get_char(sprev)=='s')
 		    {
 			/* looks like a plural apostrophe */
 			guessquote-=3;
-			if (s[1]==CHAR_SPACE)  /* bonus marks! */
+			if (g_utf8_get_char(snext)==CHAR_SPACE)
+			    /* bonus marks! */
 			    guessquote-=2;
 		    }
 		}
 		/* it doesn't have a letter either side */
-		else if (strchr(".?!,;:",s[-1]) && strchr(".?!,;: ",s[1]))
+		else if (strchr(".?!,;:",g_utf8_get_char(sprev)) &&
+		  strchr(".?!,;: ",g_utf8_get_char(snext)))
 		    guessquote+=8; /* looks like a closequote */
 		else
 		    guessquote++;
@@ -1028,24 +999,25 @@
 		    counters->close_single_quote++;
 	    }
 	}
-	if (*s!=CHAR_SPACE && *s!='-' && *s!='.' && *s!=CHAR_ASTERISK &&
-	  *s!=13 && *s!=10)
+	if (c!=CHAR_SPACE && c!='-' && c!='.' && c!=CHAR_ASTERISK &&
+	  c!='\r' && c!='\n')
 	    isemptyline=FALSE;  /* ignore lines like  *  *  *  as spacers */
-	if (*s==CHAR_UNDERSCORE)
+	if (c==CHAR_UNDERSCORE)
 	    counters->c_unders++;
-	if (*s==CHAR_OPEN_CBRACK)
+	if (c==CHAR_OPEN_CBRACK)
 	    counters->c_brack++;
-	if (*s==CHAR_CLOSE_CBRACK)
+	if (c==CHAR_CLOSE_CBRACK)
 	    counters->c_brack--;
-	if (*s==CHAR_OPEN_RBRACK)
+	if (c==CHAR_OPEN_RBRACK)
 	    counters->r_brack++;
-	if (*s==CHAR_CLOSE_RBRACK)
+	if (c==CHAR_CLOSE_RBRACK)
 	    counters->r_brack--;
-	if (*s==CHAR_OPEN_SBRACK)
+	if (c==CHAR_OPEN_SBRACK)
 	    counters->s_brack++;
-	if (*s==CHAR_CLOSE_SBRACK)
+	if (c==CHAR_CLOSE_SBRACK)
 	    counters->s_brack--;
-	s++;
+	sprev=s;
+	s=snext;
     }
     return isemptyline;
 }
@@ -1060,18 +1032,18 @@
  */
 void check_for_control_characters(const char *aline)
 {
-    unsigned char c;
+    gunichar c;
     const char *s;
-    for (s=aline;*s;s++)
+    for (s=aline;*s;s=g_utf8_next_char(s))
     {
-	c=*(unsigned char *)s;
+	c=g_utf8_get_char(s);
 	if (c<CHAR_SPACE && c!=CHAR_LF && c!=CHAR_CR && c!=CHAR_TAB)
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column %d - Control character %d\n",
-		  linecnt,(int)(s-aline)+1,c);
+		g_print("    Line %ld column %ld - Control character %u\n",
+		  linecnt,g_utf8_pointer_to_offset(s,aline)+1,c);
 	    else
 		cnt_bin++;
 	}
@@ -1087,90 +1059,93 @@
   gboolean isemptyline)
 {
     /* Don't repeat multiple warnings on one line. */
-    int eNon_A=0,eTab=0,eTilde=0,eCarat=0,eFSlash=0,eAst=0;
+    gboolean eNon_A=FALSE,eTab=FALSE,eTilde=FALSE;
+    gboolean eCarat=FALSE,eFSlash=FALSE,eAst=FALSE;
     const char *s;
-    unsigned char c;
-    for (s=aline;*s;s++)
+    gunichar c;
+    for (s=aline;*s;s=g_utf8_next_char(s))
     {
-	c=*(unsigned char *)s;
-	if (!eNon_A && (*s<CHAR_SPACE && *s!=9 && *s!='\n' || c>127))
+	c=g_utf8_get_char(s);
+	if (!eNon_A && (c<CHAR_SPACE && c!='\t' && c!='\n' || c>127))
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		if (c>127 && c<160)
-		    printf("    Line %ld column %d - "
-		      "Non-ISO-8859 character %d\n",linecnt,(int)(s-aline)+1,c);
+		if (c>127 && c<160 || c>255)
+		    g_print("    Line %ld column %ld - "
+		      "Non-ISO-8859 character %u\n",
+		      linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
 		else
-		    printf("    Line %ld column %d - Non-ASCII character %d\n",
-		      linecnt,(int)(s-aline)+1,c);
+		    g_print("    Line %ld column %ld - "
+		      "Non-ASCII character %u\n",
+		      linecnt,g_utf8_pointer_to_offset(aline,s)+1,c);
 	    else
 		cnt_bin++;
-	    eNon_A=1;
+	    eNon_A=TRUE;
 	}
-	if (!eTab && *s==CHAR_TAB)
+	if (!eTab && c==CHAR_TAB)
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column %d - Tab character?\n",
-		  linecnt,(int)(s-aline)+1);
+		g_print("    Line %ld column %ld - Tab character?\n",
+		  linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 	    else
 		cnt_odd++;
-	    eTab=1;
+	    eTab=TRUE;
 	}
-	if (!eTilde && *s==CHAR_TILDE)
+	if (!eTilde && c==CHAR_TILDE)
 	{
 	    /*
 	     * Often used by OCR software to indicate an
 	     * unrecognizable character.
 	     */
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column %d - Tilde character?\n",
-		  linecnt,(int)(s-aline)+1);
+		g_print("    Line %ld column %ld - Tilde character?\n",
+		  linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 	    else
 		cnt_odd++;
-	    eTilde=1;
+	    eTilde=TRUE;
 	}
-	if (!eCarat && *s==CHAR_CARAT)
+	if (!eCarat && c==CHAR_CARAT)
 	{  
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column %d - Carat character?\n",
-		  linecnt,(int)(s-aline)+1);
+		g_print("    Line %ld column %ld - Carat character?\n",
+		  linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 	    else
 		cnt_odd++;
-	    eCarat=1;
+	    eCarat=TRUE;
 	}
-	if (!eFSlash && *s==CHAR_FORESLASH && warnings->fslash)
+	if (!eFSlash && c==CHAR_FORESLASH && warnings->fslash)
 	{  
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column %d - Forward slash?\n",
-		  linecnt,(int)(s-aline)+1);
+		g_print("    Line %ld column %ld - Forward slash?\n",
+		  linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 	    else
 		cnt_odd++;
-	    eFSlash=1;
+	    eFSlash=TRUE;
 	}
 	/*
 	 * Report asterisks only in paranoid mode,
 	 * since they're often deliberate.
 	 */
 	if (!eAst && pswit[PARANOID_SWITCH] && warnings->ast && !isemptyline &&
-	  *s==CHAR_ASTERISK)
+	  c==CHAR_ASTERISK)
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column %d - Asterisk?\n",
-		  linecnt,(int)(s-aline)+1);
+		g_print("    Line %ld column %ld - Asterisk?\n",
+		  linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 	    else
 		cnt_odd++;
-	    eAst=1;
+	    eAst=TRUE;
 	}
     }
 }
@@ -1182,13 +1157,13 @@
  */
 void check_for_long_line(const char *aline)
 {
-    if (strlen(aline)>LONGEST_PG_LINE)
+    if (g_utf8_strlen(aline,-1)>LONGEST_PG_LINE)
     {
 	if (pswit[ECHO_SWITCH])
-	    printf("\n%s\n",aline);
+	    g_print("\n%s\n",aline);
 	if (!pswit[OVERVIEW_SWITCH])
-	    printf("    Line %ld column %d - Long line %d\n",
-	      linecnt,(int)strlen(aline),(int)strlen(aline));
+	    g_print("    Line %ld column %ld - Long line %ld\n",
+	      linecnt,g_utf8_strlen(aline,-1),g_utf8_strlen(aline,-1));
 	else
 	    cnt_long++;
     }
@@ -1220,14 +1195,15 @@
  */
 void check_for_short_line(const char *aline,const struct line_properties *last)
 {
-    if (strlen(aline)>1 && last->len>1 && last->len<SHORTEST_PG_LINE &&
-      last->blen>1 && last->blen>SHORTEST_PG_LINE && last->start!=CHAR_SPACE)
+    if (g_utf8_strlen(aline,-1)>1 && last->len>1 &&
+      last->len<SHORTEST_PG_LINE && last->blen>1 &&
+      last->blen>SHORTEST_PG_LINE && last->start!=CHAR_SPACE)
     {
 	if (pswit[ECHO_SWITCH])
-	    printf("\n%s\n",prevline);
+	    g_print("\n%s\n",prevline);
 	if (!pswit[OVERVIEW_SWITCH])
-	    printf("    Line %ld column %d - Short line %d?\n",
-	      linecnt-1,(int)strlen(prevline),(int)strlen(prevline));
+	    g_print("    Line %ld column %ld - Short line %ld?\n",
+	      linecnt-1,g_utf8_strlen(prevline,-1),g_utf8_strlen(prevline,-1));
 	else
 	    cnt_short++;
     }
@@ -1240,12 +1216,13 @@
  */
 void check_for_starting_punctuation(const char *aline)
 {
-    if (*aline && strchr(".?!,;:",aline[0]) && strncmp(". . .",aline,5))
+    if (*aline && g_utf8_strchr(".?!,;:",-1,g_utf8_get_char(aline)) &&
+      !g_str_has_prefix(aline,". . ."))
     {
 	if (pswit[ECHO_SWITCH])
-	    printf("\n%s\n",aline);
+	    g_print("\n%s\n",aline);
 	if (!pswit[OVERVIEW_SWITCH])
-	    printf("    Line %ld column 1 - Begins with punctuation?\n",
+	    g_print("    Line %ld column 1 - Begins with punctuation?\n",
 	      linecnt);
 	else
 	    cnt_punct++;
@@ -1263,21 +1240,21 @@
  */
 void check_for_spaced_emdash(const char *aline)
 {
-    const char *s,*t;
-    s=aline;
-    while ((t=strstr(s,"--")))
+    const char *s,*t,*next;
+    for (s=aline;t=strstr(s,"--");s=next)
     {
-	if (t>aline && t[-1]==CHAR_SPACE || t[2]==CHAR_SPACE)
+	next=g_utf8_next_char(g_utf8_next_char(t));
+	if (t>aline && g_utf8_get_char(g_utf8_prev_char(t))==CHAR_SPACE ||
+	  g_utf8_get_char(next)==CHAR_SPACE)
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column %d - Spaced em-dash?\n",
-		  linecnt,(int)(t-aline)+1);
+		g_print("    Line %ld column %ld - Spaced em-dash?\n",
+		  linecnt,g_utf8_pointer_to_offset(aline,t)+1);
 	    else
 		cnt_dash++;
 	}
-	s=t+2;
     }
 }
 
@@ -1291,26 +1268,26 @@
     const char *s;
     if ((s=strstr(aline," -")))
     {
-	if (s[2]!='-')
+	if (g_utf8_get_char(g_utf8_next_char(g_utf8_next_char(s)))!='-')
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column %d - Spaced dash?\n",
-		  linecnt,(int)(s-aline)+1);
+		g_print("    Line %ld column %ld - Spaced dash?\n",
+		  linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 	    else
 		cnt_dash++;
 	}
     }
     else if ((s=strstr(aline,"- ")))
     {
-	if (s==aline || s[-1]!='-')
+	if (s==aline || g_utf8_get_char(g_utf8_prev_char(s))!='-')
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column %d - Spaced dash?\n",
-		  linecnt,(int)(s-aline)+1);
+		g_print("    Line %ld column %ld - Spaced dash?\n",
+		  linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 	    else
 		cnt_dash++;
 	}
@@ -1335,10 +1312,11 @@
     if (s)
     {
 	if (pswit[ECHO_SWITCH])
-	    printf("\n%s\n",aline);
+	    g_print("\n%s\n",aline);
 	if (!pswit[OVERVIEW_SWITCH])
-	    printf("    Line %ld column %d - Query missing paragraph break?\n",
-	      linecnt,(int)(s-aline)+1);
+	    g_print("    Line %ld column %ld - "
+	      "Query missing paragraph break?\n",
+	      linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 	else
 	    cnt_punct++;
     }
@@ -1382,10 +1360,10 @@
     if (s)
     {
 	if (pswit[ECHO_SWITCH])
-	    printf("\n%s\n",aline);
+	    g_print("\n%s\n",aline);
 	if (!pswit[OVERVIEW_SWITCH])
-	    printf("    Line %ld column %d - Query he/be error?\n",
-	      linecnt,(int)(s-aline)+1);
+	    g_print("    Line %ld column %ld - Query he/be error?\n",
+	      linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 	else
 	    cnt_word++;
     }
@@ -1405,10 +1383,10 @@
     if (s)
     {
 	if (pswit[ECHO_SWITCH])
-	    printf("\n%s\n",aline);
+	    g_print("\n%s\n",aline);
 	if (!pswit[OVERVIEW_SWITCH])
-	    printf("    Line %ld column %d - Query had/bad error?\n",
-	      linecnt,(int)(s-aline)+1);
+	    g_print("    Line %ld column %ld - Query had/bad error?\n",
+	      linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 	else
 	    cnt_word++;
     }
@@ -1418,10 +1396,10 @@
     if (s)
     {
 	if (pswit[ECHO_SWITCH])
-	    printf("\n%s\n",aline);
+	    g_print("\n%s\n",aline);
 	if (!pswit[OVERVIEW_SWITCH])
-	    printf("    Line %ld column %d - Query hut/but error?\n",
-	      linecnt,(int)(s-aline)+1);
+	    g_print("    Line %ld column %ld - Query hut/but error?\n",
+	      linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 	else
 	    cnt_word++;
     }
@@ -1440,10 +1418,11 @@
     if (s)
     {
 	if (pswit[ECHO_SWITCH])
-	    printf("\n%s\n",aline);
+	    g_print("\n%s\n",aline);
 	if (!pswit[OVERVIEW_SWITCH])
-	    printf("    Line %ld column %d - Query angled bracket with From\n",
-	      linecnt,(int)(s-aline)+1);
+	    g_print("    Line %ld column %ld - "
+	      "Query angled bracket with From\n",
+	      linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 	else
 	    cnt_punct++;
     }
@@ -1457,17 +1436,18 @@
  */
 void check_for_orphan_character(const char *aline)
 {
-    if (*aline && !aline[1])
+    gunichar c;
+    c=g_utf8_get_char(aline);
+    if (c && !*g_utf8_next_char(aline))
     {
-	if (*aline=='I' || *aline=='V' || *aline=='X' || *aline=='L' ||
-	  gcisdigit(*aline))
+	if (c=='I' || c=='V' || c=='X' || c=='L' || g_unichar_isdigit(c))
 	    ; /* Nothing - ignore numerals alone on a line. */
 	else
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column 1 - Query single character line\n",
+		g_print("    Line %ld column 1 - Query single character line\n",
 		  linecnt);
 	    else
 		cnt_punct++;
@@ -1487,10 +1467,10 @@
     if (s)
     {
 	if (pswit[ECHO_SWITCH])
-	    printf("\n%s\n",aline);
+	    g_print("\n%s\n",aline);
 	if (!pswit[OVERVIEW_SWITCH])
-	    printf("    Line %ld column %ld - Query I=exclamation mark?\n",
-	      linecnt,s-aline);
+	    g_print("    Line %ld column %ld - Query I=exclamation mark?\n",
+	      linecnt,g_utf8_pointer_to_offset(aline,s));
 	else
 	    cnt_punct++;
     }
@@ -1506,47 +1486,58 @@
 {
     const char *s,*t,*s1;
     int i;
+    gsize len;
     gboolean istypo;
     gchar *testword;
+    gunichar *decomposition;
     if (pswit[PARANOID_SWITCH])
     {
-	for (t=aline;strstr(t,". ");)
+	for (t=aline;t=strstr(t,". ");)
 	{
-	    t=strstr(t,". ");
 	    if (t==aline)
 	    {
-		t++;
+		t=g_utf8_next_char(t);
 		/* start of line punctuation is handled elsewhere */
 		continue;
 	    }
-	    if (!gcisalpha(t[-1]))
+	    if (!g_unichar_isalpha(g_utf8_get_char(g_utf8_prev_char(t))))
 	    {
-		t++;
+		t=g_utf8_next_char(t);
 		continue;
 	    }
 	    if (warnings->isDutch)
 	    {
 		/* For Frank & Jeroen -- 's Middags case */
-		if (t[2]==CHAR_SQUOTE && t[3]>='a' && t[3]<='z' &&
-		  t[4]==CHAR_SPACE && t[5]>='A' && t[5]<='Z')
+		gunichar c2,c3,c4,c5;
+		c2=g_utf8_get_char(g_utf8_offset_to_pointer(t,2));
+		c3=g_utf8_get_char(g_utf8_offset_to_pointer(t,3));
+		c4=g_utf8_get_char(g_utf8_offset_to_pointer(t,4));
+		c5=g_utf8_get_char(g_utf8_offset_to_pointer(t,5));
+		if (c2==CHAR_SQUOTE && g_unichar_islower(c3) &&
+		  c4==CHAR_SPACE && g_unichar_isupper(c5))
 		{
-		    t++;
+		    t=g_utf8_next_char(t);
 		    continue;
 		}
 	    }
-	    s1=t+2;
-	    while (*s1 && !gcisalpha(*s1) && !isdigit(*s1))
-		s1++;
-	    if (*s1>='a' && *s1<='z')
+	    s1=g_utf8_next_char(g_utf8_next_char(t));
+	    while (*s1 && !g_unichar_isalpha(g_utf8_get_char(s1)) &&
+	      !isdigit(g_utf8_get_char(s1)))
+		s1=g_utf8_next_char(s1);
+	    if (g_unichar_islower(g_utf8_get_char(s1)))
 	    {
 		/* we have something to investigate */
 		istypo=TRUE;
 		/* so let's go back and find out */
-		for (s1=t-1;s1>=aline &&
-		  (gcisalpha(*s1) || gcisdigit(*s1) || *s1==CHAR_SQUOTE &&
-		  gcisalpha(s1[1]) && gcisalpha(s1[-1]));s1--)
+		for (s1=g_utf8_prev_char(t);s1>=aline &&
+		  (g_unichar_isalpha(g_utf8_get_char(s1)) ||
+		  g_unichar_isdigit(g_utf8_get_char(s1)) ||
+		  g_utf8_get_char(s1)==CHAR_SQUOTE &&
+		  g_unichar_isalpha(g_utf8_get_char(g_utf8_next_char(s1))) &&
+		  g_unichar_isalpha(g_utf8_get_char(g_utf8_prev_char(s1))));
+		  s1=g_utf8_prev_char(s1))
 		    ;
-		s1++;
+		s1=g_utf8_next_char(s1);
 		s=strchr(s1,'.');
 		if (s)
 		    testword=g_strndup(s1,s-s1);
@@ -1555,18 +1546,23 @@
 		for (i=0;*abbrev[i];i++)
 		    if (!strcmp(testword,abbrev[i]))
 			istypo=FALSE;
-		if (gcisdigit(*testword))
+		if (g_unichar_isdigit(g_utf8_get_char(testword)))
 		    istypo=FALSE;
-		if (!testword[1])
+		if (!*g_utf8_next_char(testword))
 		    istypo=FALSE;
 		if (isroman(testword))
 		    istypo=FALSE;
 		if (istypo)
 		{
 		    istypo=FALSE;
-		    for (i=0;testword[i];i++)
-			if (strchr(vowels,testword[i]))
+		    for (s=testword;*s;s=g_utf8_next_char(s))
+		    {
+			decomposition=g_unicode_canonical_decomposition(
+			  g_utf8_get_char(s),&len);
+			if (g_utf8_strchr("aeiou",-1,decomposition[0]))
 			    istypo=TRUE;
+			g_free(decomposition);
+		    }
 		}
 		if (istypo &&
 		  (pswit[VERBOSE_SWITCH] || !g_tree_lookup(qperiod,testword)))
@@ -1574,16 +1570,16 @@
 		    g_tree_insert(qperiod,g_strdup(testword),
 		      GINT_TO_POINTER(1));
 		    if (pswit[ECHO_SWITCH])
-			printf("\n%s\n",aline);
+			g_print("\n%s\n",aline);
 		    if (!pswit[OVERVIEW_SWITCH])
-			printf("    Line %ld column %d - Extra period?\n",
-			  linecnt,(int)(t-aline)+1);
+			g_print("    Line %ld column %ld - Extra period?\n",
+			  linecnt,g_utf8_pointer_to_offset(aline,t)+1);
 		    else
 			cnt_punct++;
 		}
 		g_free(testword);
 	    }
-	    t++;
+	    t=g_utf8_next_char(t);
 	}
     }
 }
@@ -1597,6 +1593,7 @@
 {
     int i;
     const char *s,*wordstart;
+    gunichar c;
     gchar *inword,*t;
     if (pswit[TYPO_SWITCH])
     {
@@ -1609,19 +1606,21 @@
 		g_free(t);
 		continue;
 	    }
-	    inword=g_ascii_strdown(t,-1);
+	    inword=g_utf8_strdown(t,-1);
 	    g_free(t);
 	    for (i=0;*nocomma[i];i++)
 		if (!strcmp(inword,nocomma[i]))
 		{
-		    if (*s==',' || *s==';' || *s==':')
+		    c=g_utf8_get_char(s);
+		    if (c==',' || c==';' || c==':')
 		    {
 			if (pswit[ECHO_SWITCH])
-			    printf("\n%s\n",aline);
+			    g_print("\n%s\n",aline);
 			if (!pswit[OVERVIEW_SWITCH])
-			    printf("    Line %ld column %d - "
+			    g_print("    Line %ld column %ld - "
 			      "Query punctuation after %s?\n",
-			      linecnt,(int)(s-aline)+1,inword);
+			      linecnt,g_utf8_pointer_to_offset(aline,s)+1,
+			      inword);
 			else
 			    cnt_punct++;
 		    }
@@ -1629,14 +1628,16 @@
 	    for (i=0;*noperiod[i];i++)
 		if (!strcmp(inword,noperiod[i]))
 		{
-		    if (*s=='.' || *s=='!')
+		    c=g_utf8_get_char(s);
+		    if (c=='.' || c=='!')
 		    {
 			if (pswit[ECHO_SWITCH])
-			    printf("\n%s\n",aline);
+			    g_print("\n%s\n",aline);
 			if (!pswit[OVERVIEW_SWITCH])
-			    printf("    Line %ld column %d - "
+			    g_print("    Line %ld column %ld - "
 			      "Query punctuation after %s?\n",
-			      linecnt,(int)(s-aline)+1,inword);
+			      linecnt,g_utf8_pointer_to_offset(aline,s)+1,
+			      inword);
 			else
 			    cnt_punct++;
 		    }
@@ -1654,10 +1655,15 @@
  */
 void check_for_typos(const char *aline,struct warnings *warnings)
 {
-    const char *s,*wordstart;
-    gchar *inword,*testword;
-    int i,alower,vowel,consonant,*dupcnt;
-    gboolean isdup,istypo;
+    const char *s,*t,*nt,*wordstart;
+    gchar *inword;
+    gunichar *decomposition;
+    gchar *testword;
+    int i,vowel,consonant,*dupcnt;
+    gboolean isdup,istypo,alower;
+    gunichar c;
+    long offset,len;
+    gsize decomposition_len;
     for (s=aline;*s;)
     {
 	wordstart=s;
@@ -1670,10 +1676,10 @@
 	if (mixdigit(inword))
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column %d - Query digit in %s\n",
-		  linecnt,(int)(wordstart-aline)+1,inword);
+		g_print("    Line %ld column %ld - Query digit in %s\n",
+		  linecnt,g_utf8_pointer_to_offset(aline,wordstart)+1,inword);
 	    else
 		cnt_word++;
 	}
@@ -1684,14 +1690,15 @@
 	if (pswit[TYPO_SWITCH] || pswit[USERTYPO_SWITCH])
 	{
 	    istypo=FALSE;
-	    testword=g_strdup(inword);
-	    alower=0;
-	    for (i=0;i<(int)strlen(testword);i++)
+	    alower=FALSE;
+	    for (t=inword;*t;t=g_utf8_next_char(t))
 	    {
+		c=g_utf8_get_char(t);
+		nt=g_utf8_next_char(t);
 		/* lowercase for testing */
-		if (testword[i]>='a' && testword[i]<='z')
-		    alower=1;
-		if (alower && testword[i]>='A' && testword[i]<='Z')
+		if (g_unichar_islower(c))
+		    alower=TRUE;
+		if (alower && (g_unichar_isupper(c) || g_unichar_istitle(c)))
 		{
 		    /*
 		     * We have an uppercase mid-word. However, there are
@@ -1699,15 +1706,18 @@
 		     *   Mac and Mc like McGill
 		     *   French contractions like l'Abbe
 		     */
-		    if (i==2 && testword[0]=='m' && testword[1]=='c' ||
-		      i==3 && testword[0]=='m' && testword[1]=='a' &&
-		      testword[2]=='c' || i>0 && testword[i-1]==CHAR_SQUOTE)
+		    offset=g_utf8_pointer_to_offset(inword,t);
+		    if (offset==2 && c=='m' && g_utf8_get_char(nt)=='c' ||
+		      offset==3 && c=='m' && g_utf8_get_char(nt)=='a' &&
+		      g_utf8_get_char(g_utf8_next_char(nt))=='c' ||
+		      offset>0 &&
+		      g_utf8_get_char(g_utf8_prev_char(t))==CHAR_SQUOTE)
 			; /* do nothing! */
 		    else
 			istypo=TRUE;
 		}
-		testword[i]=(char)tolower(testword[i]);
 	    }
+	    testword=g_utf8_casefold(inword,-1);
 	}
 	if (pswit[TYPO_SWITCH])
 	{
@@ -1715,13 +1725,14 @@
 	     * Check for certain unlikely two-letter combinations at word
 	     * start and end.
 	     */
-	    if (strlen(testword)>1)
+	    len=g_utf8_strlen(testword,-1);
+	    if (len>1)
 	    {
 		for (i=0;*nostart[i];i++)
-		    if (!strncmp(testword,nostart[i],2))
+		    if (g_str_has_prefix(testword,nostart[i]))
 			istypo=TRUE;
 		for (i=0;*noend[i];i++)
-		    if (!strncmp(testword+strlen(testword)-2,noend[i],2))
+		    if (g_str_has_suffix(testword,noend[i]))
 			istypo=TRUE;
 	    }
 	    /* ght is common, gbt never. Like that. */
@@ -1755,21 +1766,25 @@
 	     * Check for no vowels or no consonants.
 	     * If none, flag a typo.
 	     */
-	    if (!istypo && strlen(testword)>1)
+	    if (!istypo && len>1)
 	    {
 		vowel=consonant=0;
-		for (i=0;testword[i];i++)
+		for (t=testword;*t;t=g_utf8_next_char(t))
 		{
-		    if (testword[i]=='y' || gcisdigit(testword[i]))
+		    c=g_utf8_get_char(t);
+		    decomposition=
+		      g_unicode_canonical_decomposition(c,&decomposition_len);
+		    if (c=='y' || g_unichar_isdigit(c))
 		    {
 			/* Yah, this is loose. */
 			vowel++;
 			consonant++;
 		    }
-		    else if (strchr(vowels,testword[i]))
+		    else if (g_utf8_strchr("aeiou",-1,decomposition[0]))
 			vowel++;
 		    else
 			consonant++;
+		    g_free(decomposition);
 		}
 		if (!vowel || !consonant)
 		    istypo=TRUE;
@@ -1798,7 +1813,8 @@
 	     *   "d" for a missing apostrophe - he d
 	     *   "n" for "in"
 	     */
-	    if (!istypo && strlen(testword)==1 && strchr("slmijdn",*inword))
+	    if (!istypo && len==1 &&
+	      g_utf8_strchr("slmijdn",-1,g_utf8_get_char(inword)))
 		istypo=TRUE;
 	    if (istypo)
 	    {
@@ -1817,14 +1833,15 @@
 		if (!isdup)
 		{
 		    if (pswit[ECHO_SWITCH])
-			printf("\n%s\n",aline);
+			g_print("\n%s\n",aline);
 		    if (!pswit[OVERVIEW_SWITCH])
 		    {
-			printf("    Line %ld column %d - Query word %s",
-			  linecnt,(int)(wordstart-aline)+1,inword);
+			g_print("    Line %ld column %ld - Query word %s",
+			  linecnt,g_utf8_pointer_to_offset(aline,wordstart)+1,
+			  inword);
 			if (!pswit[VERBOSE_SWITCH])
-			    printf(" - not reporting duplicates");
-			printf("\n");
+			    g_print(" - not reporting duplicates");
+			g_print("\n");
 		    }
 		    else
 			cnt_word++;
@@ -1835,10 +1852,10 @@
 	if (!istypo && usertypo && g_tree_lookup(usertypo,testword))
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])  
-		printf("    Line %ld column %d - Query possible scanno %s\n",
-		  linecnt,(int)(wordstart-aline)+2,inword);
+		g_print("    Line %ld column %ld - Query possible scanno %s\n",
+		  linecnt,g_utf8_pointer_to_offset(aline,wordstart)+2,inword);
 	}
 	if (pswit[TYPO_SWITCH] || pswit[USERTYPO_SWITCH])
 	    g_free(testword);
@@ -1848,10 +1865,11 @@
 	    if (!strcmp(inword,"0") || !strcmp(inword,"1"))
 	    {
 		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",aline);
+		    g_print("\n%s\n",aline);
 		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - Query standalone %s\n",
-		      linecnt,(int)(wordstart-aline)+2,inword);
+		    g_print("    Line %ld column %ld - Query standalone %s\n",
+		      linecnt,g_utf8_pointer_to_offset(aline,wordstart)+2,
+		      inword);
 		else
 		    cnt_word++;
 	    }
@@ -1873,63 +1891,73 @@
 void check_for_misspaced_punctuation(const char *aline,
   struct parities *parities,gboolean isemptyline)
 {
-    int i,llen;
     gboolean isacro,isellipsis;
     const char *s;
-    llen=strlen(aline);
-    for (i=1;i<llen;i++)
+    gunichar c,nc,pc,n2c;
+    c=g_utf8_get_char(aline);
+    nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
+    for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
     {
+	pc=c;
+	c=nc;
+	nc=g_utf8_get_char(g_utf8_next_char(s));
 	/* For each character in the line after the first. */
-	if (strchr(".?!,;:_",aline[i]))  /* if it's punctuation */
+	if (g_utf8_strchr(".?!,;:_",-1,c))  /* if it's punctuation */
 	{
 	    /* we need to suppress warnings for acronyms like M.D. */
 	    isacro=FALSE;
 	    /* we need to suppress warnings for ellipsis . . . */
 	    isellipsis=FALSE;
-	    /* if there are letters on both sides of it or ... */
-	    if (gcisalpha(aline[i-1]) && gcisalpha(aline[i+1]) ||
-	       gcisalpha(aline[i+1]) && strchr("?!,;:",aline[i]))
+	    /*
+	     * If there are letters on both sides of it or
+	     * if it's strict punctuation followed by an alpha.
+	     */
+	    if (g_unichar_isalpha(nc) && (g_unichar_isalpha(pc) ||
+	      g_utf8_strchr("?!,;:",-1,c)))
 	    {
-		/* ...if it's strict punctuation followed by an alpha */
-		if (aline[i]=='.')
+		if (c=='.')
 		{
-		    if (i>2 && aline[i-2]=='.')
+		    if (g_utf8_pointer_to_offset(aline,s)>2 &&
+		      g_utf8_get_char(g_utf8_offset_to_pointer(s,-2))=='.')
 			isacro=TRUE;
-		    if (i+2<llen && aline[i+2]=='.')
+		    n2c=g_utf8_get_char(g_utf8_next_char(g_utf8_next_char(s)));
+		    if (nc && n2c=='.')
 			isacro=TRUE;
 		}
 		if (!isacro)
 		{
 		    if (pswit[ECHO_SWITCH])
-			printf("\n%s\n",aline);
+			g_print("\n%s\n",aline);
 		    if (!pswit[OVERVIEW_SWITCH])
-			printf("    Line %ld column %d - Missing space?\n",
-			  linecnt,i+1);
+			g_print("    Line %ld column %ld - Missing space?\n",
+			  linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 		    else
 			cnt_punct++;
 		}
 	    }
-	    if (aline[i-1]==CHAR_SPACE &&
-	      (aline[i+1]==CHAR_SPACE || aline[i+1]==0))
+	    if (pc==CHAR_SPACE && (nc==CHAR_SPACE || !nc))
 	    {
 		/*
 		 * If there are spaces on both sides,
 		 * or space before and end of line.
 		 */
-		if (aline[i]=='.')
+		if (c=='.')
 		{
-		    if (i>2 && aline[i-2]=='.')
+		    if (g_utf8_pointer_to_offset(aline,s)>2 &&
+		      g_utf8_get_char(g_utf8_offset_to_pointer(s,-2))=='.')
 			isellipsis=TRUE;
-		    if (i+2<llen && aline[i+2]=='.')
+		    n2c=g_utf8_get_char(g_utf8_next_char(g_utf8_next_char(s)));
+		    if (nc && n2c=='.')
 			isellipsis=TRUE;
 		}
 		if (!isemptyline && !isellipsis)
 		{
 		    if (pswit[ECHO_SWITCH])
-			printf("\n%s\n",aline);
+			g_print("\n%s\n",aline);
 		    if (!pswit[OVERVIEW_SWITCH])
-			printf("    Line %ld column %d - "
-			  "Spaced punctuation?\n",linecnt,i+1);
+			g_print("    Line %ld column %ld - "
+			  "Spaced punctuation?\n",linecnt,
+			  g_utf8_pointer_to_offset(aline,s)+1);
 		    else
 			cnt_punct++;
 		}
@@ -1937,25 +1965,28 @@
 	}
     }
     /* Split out the characters that CANNOT be preceded by space. */
-    llen=strlen(aline);
-    for (i=1;i<llen;i++)
+    c=g_utf8_get_char(aline);
+    nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
+    for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
     {
+	pc=c;
+	c=nc;
+	nc=g_utf8_get_char(g_utf8_next_char(s));
 	/* for each character in the line after the first */
-	if (strchr("?!,;:",aline[i]))
+	if (g_utf8_strchr("?!,;:",-1,c))
 	{
 	    /* if it's punctuation that _cannot_ have a space before it */
-	    if (aline[i-1]==CHAR_SPACE && !isemptyline &&
-	      aline[i+1]!=CHAR_SPACE)
+	    if (pc==CHAR_SPACE && !isemptyline && nc!=CHAR_SPACE)
 	    {
 		/*
-		 * If aline[i+1) DOES == space,
+		 * If nc DOES == space,
 		 * it was already reported just above.
 		 */
 		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",aline);
+		    g_print("\n%s\n",aline);
 		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - Spaced punctuation?\n",
-		      linecnt,i+1);
+		    g_print("    Line %ld column %ld - Spaced punctuation?\n",
+		      linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 		else
 		    cnt_punct++;
 	    }
@@ -1966,64 +1997,77 @@
      * This plugs a hole in the acronym code above.
      * Inelegant, but maintainable.
      */
-    llen=strlen(aline);
-    for (i=1;i<llen;i++)
+    c=g_utf8_get_char(aline);
+    nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
+    for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
     {
+	pc=c;
+	c=nc;
+	nc=g_utf8_get_char(g_utf8_next_char(s));
 	/* for each character in the line after the first */
-	if (aline[i]=='.')
+	if (c=='.')
 	{
 	    /* if it's a period */
-	    if (aline[i-1]==CHAR_SPACE && gcisalpha(aline[i+1]))
+	    if (pc==CHAR_SPACE && g_unichar_isalpha(nc))
 	    {
 		/*
 		 * If the period follows a space and
 		 * is followed by a letter.
 		 */
 		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",aline);
+		    g_print("\n%s\n",aline);
 		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - Spaced punctuation?\n",
-		      linecnt,i+1);
+		    g_print("    Line %ld column %ld - Spaced punctuation?\n",
+		      linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 		else
 		    cnt_punct++;
 	    }
 	}
     }
-    for (i=1;i<llen;i++)
+    c=g_utf8_get_char(aline);
+    nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
+    for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
     {
+	pc=c;
+	c=nc;
+	nc=g_utf8_get_char(g_utf8_next_char(s));
 	/* for each character in the line after the first */
-	if (aline[i]==CHAR_DQUOTE)
+	if (c==CHAR_DQUOTE)
 	{
-	    if (!strchr(" _-.'`,;:!/([{?}])",aline[i-1]) &&
-	      !strchr(" _-.'`,;:!/([{?}])",aline[i+1]) && aline[i+1] ||
-	      !strchr(" _-([{'`",aline[i-1]) && gcisalpha(aline[i+1]))
+	    if (!g_utf8_strchr(" _-.'`,;:!/([{?}])",-1,pc) &&
+	      !g_utf8_strchr(" _-.'`,;:!/([{?}])",-1,nc) && nc ||
+	      !g_utf8_strchr(" _-([{'`",-1,pc) && g_unichar_isalpha(nc))
 	    {
 		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",aline);
+		    g_print("\n%s\n",aline);
 		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - Unspaced quotes?\n",
-		      linecnt,i+1);
+		    g_print("    Line %ld column %ld - Unspaced quotes?\n",
+		      linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 		else
 		    cnt_punct++;
 	    }
 	}
     }
     /* Check parity of quotes. */
-    for (s=aline;*s;s++)
+    nc=g_utf8_get_char(aline);
+    for (s=aline;*s;s=g_utf8_next_char(s))
     {
-	if (*s==CHAR_DQUOTE)
+	c=nc;
+	nc=g_utf8_get_char(g_utf8_next_char(s));
+	if (c==CHAR_DQUOTE)
 	{
 	    parities->dquote=!parities->dquote;
 	    if (!parities->dquote)
 	    {
 		/* parity even */
-		if (!strchr("_-.'`/,;:!?)]} ",s[1]))
+		if (!g_utf8_strchr("_-.'`/,;:!?)]} ",-1,nc))
 		{
 		    if (pswit[ECHO_SWITCH])
-			printf("\n%s\n",aline);
+			g_print("\n%s\n",aline);
 		    if (!pswit[OVERVIEW_SWITCH])
-			printf("    Line %ld column %d - "
-			  "Wrongspaced quotes?\n",linecnt,(int)(s-aline)+1);
+			g_print("    Line %ld column %ld - "
+			  "Wrongspaced quotes?\n",
+			  linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 		    else
 			cnt_punct++;
 		}
@@ -2031,28 +2075,30 @@
 	    else
 	    {
 		/* parity odd */
-		if (!gcisalpha(s[1]) && !isdigit(s[1]) &&
-		  !strchr("_-/.'`([{$",s[1]) || !s[1])
+		if (!g_unichar_isalpha(nc) && !isdigit(nc) &&
+		  !g_utf8_strchr("_-/.'`([{$",-1,nc) || !nc)
 		{
 		    if (pswit[ECHO_SWITCH])
-			printf("\n%s\n",aline);
+			g_print("\n%s\n",aline);
 		    if (!pswit[OVERVIEW_SWITCH])
-			printf("    Line %ld column %d - "
-			  "Wrongspaced quotes?\n",linecnt,(int)(s-aline)+1);
+			g_print("    Line %ld column %ld - "
+			  "Wrongspaced quotes?\n",
+			  linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 		    else
 			cnt_punct++;
 		}
 	    }
 	}
     }
-    if (*aline==CHAR_DQUOTE)
+    if (g_utf8_get_char(aline)==CHAR_DQUOTE)
     {
-	if (strchr(",;:!?)]} ",aline[1]))
+	if (g_utf8_strchr(",;:!?)]} ",-1,
+	  g_utf8_get_char(g_utf8_next_char(aline))))
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column 1 - Wrongspaced quotes?\n",
+		g_print("    Line %ld column 1 - Wrongspaced quotes?\n",
 		  linecnt);
 	    else
 		cnt_punct++;
@@ -2060,24 +2106,28 @@
     }
     if (pswit[SQUOTE_SWITCH])
     {
-	for (s=aline;*s;s++)
+	nc=g_utf8_get_char(aline);
+	for (s=aline;*s;s=g_utf8_next_char(s))
 	{
-	    if ((*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE) &&
-	      (s==aline || s>aline && !gcisalpha(s[-1]) ||
-	      !gcisalpha(s[1])))
+	    c=nc;
+	    nc=g_utf8_get_char(g_utf8_next_char(s));
+	    if ((c==CHAR_SQUOTE || c==CHAR_OPEN_SQUOTE) && (s==aline ||
+	      s>aline &&
+	      !g_unichar_isalpha(g_utf8_get_char(g_utf8_prev_char(s))) ||
+	      !g_unichar_isalpha(nc)))
 	    {
 		parities->squote=!parities->squote;
 		if (!parities->squote)
 		{
 		    /* parity even */
-		    if (!strchr("_-.'`/\",;:!?)]} ",s[1]))
+		    if (!g_utf8_strchr("_-.'`/\",;:!?)]} ",-1,nc))
 		    {
 			if (pswit[ECHO_SWITCH])
-			    printf("\n%s\n",aline);
+			    g_print("\n%s\n",aline);
 			if (!pswit[OVERVIEW_SWITCH])
-			    printf("    Line %ld column %d - "
+			    g_print("    Line %ld column %ld - "
 			      "Wrongspaced singlequotes?\n",
-			      linecnt,(int)(s-aline)+1);
+			      linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 			else
 			    cnt_punct++;
 		    }
@@ -2085,15 +2135,15 @@
 		else
 		{
 		    /* parity odd */
-		    if (!gcisalpha(s[1]) && !isdigit(s[1]) &&
-		      !strchr("_-/\".'`",s[1]) || !s[1])
+		    if (!g_unichar_isalpha(nc) && !isdigit(nc) &&
+		      !g_utf8_strchr("_-/\".'`",-1,nc) || !nc)
 		    {
 			if (pswit[ECHO_SWITCH])
-			    printf("\n%s\n",aline);
+			    g_print("\n%s\n",aline);
 			if (!pswit[OVERVIEW_SWITCH])
-			    printf("    Line %ld column %d - "
+			    g_print("    Line %ld column %ld - "
 			      "Wrongspaced singlequotes?\n",
-			      linecnt,(int)(s-aline)+1);
+			      linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 			else
 			    cnt_punct++;
 		    }
@@ -2117,49 +2167,54 @@
  */
 void check_for_double_punctuation(const char *aline,struct warnings *warnings)
 {
-    int i,llen;
-    llen=strlen(aline);
-    for (i=0;i<llen;i++)
+    const char *s;
+    gunichar c,nc;
+    nc=g_utf8_get_char(aline);
+    for (s=aline;*s;s=g_utf8_next_char(s))
     {
+	c=nc;
+	nc=g_utf8_get_char(g_utf8_next_char(s));
 	/* for each punctuation character in the line */
-	if (strchr(".?!,;:",aline[i]) && strchr(".?!,;:",aline[i+1]) &&
-	  aline[i] && aline[i+1])
+	if (c && nc && g_utf8_strchr(".?!,;:",-1,c) &&
+	  g_utf8_strchr(".?!,;:",-1,nc))
 	{
 	    /* followed by punctuation, it's a query, unless . . . */
-	    if (aline[i]==aline[i+1] && (aline[i]=='.' || aline[i]=='?' ||
-	      aline[i]=='!') ||
-	      !warnings->dotcomma && aline[i]=='.' && aline[i+1]==',' ||
-	      warnings->isFrench && !strncmp(aline+i,",...",4) ||
-	      warnings->isFrench && !strncmp(aline+i,"...,",4) ||
-	      warnings->isFrench && !strncmp(aline+i,";...",4) ||
-	      warnings->isFrench && !strncmp(aline+i,"...;",4) ||
-	      warnings->isFrench && !strncmp(aline+i,":...",4) ||
-	      warnings->isFrench && !strncmp(aline+i,"...:",4) ||
-	      warnings->isFrench && !strncmp(aline+i,"!...",4) ||
-	      warnings->isFrench && !strncmp(aline+i,"...!",4) ||
-	      warnings->isFrench && !strncmp(aline+i,"?...",4) ||
-	      warnings->isFrench && !strncmp(aline+i,"...?",4))
+	    if (c==nc && (c=='.' || c=='?' || c=='!') ||
+	      !warnings->dotcomma && c=='.' && nc==',' ||
+	      warnings->isFrench && g_str_has_prefix(s,",...") ||
+	      warnings->isFrench && g_str_has_prefix(s,"...,") ||
+	      warnings->isFrench && g_str_has_prefix(s,";...") ||
+	      warnings->isFrench && g_str_has_prefix(s,"...;") ||
+	      warnings->isFrench && g_str_has_prefix(s,":...") ||
+	      warnings->isFrench && g_str_has_prefix(s,"...:") ||
+	      warnings->isFrench && g_str_has_prefix(s,"!...") ||
+	      warnings->isFrench && g_str_has_prefix(s,"...!") ||
+	      warnings->isFrench && g_str_has_prefix(s,"?...") ||
+	      warnings->isFrench && g_str_has_prefix(s,"...?"))
 	    {
-		if (warnings->isFrench && !strncmp(aline+i,",...",4) ||
-		  warnings->isFrench && !strncmp(aline+i,"...,",4) ||
-		  warnings->isFrench && !strncmp(aline+i,";...",4) ||
-		  warnings->isFrench && !strncmp(aline+i,"...;",4) ||
-		  warnings->isFrench && !strncmp(aline+i,":...",4) ||
-		  warnings->isFrench && !strncmp(aline+i,"...:",4) ||
-		  warnings->isFrench && !strncmp(aline+i,"!...",4) ||
-		  warnings->isFrench && !strncmp(aline+i,"...!",4) ||
-		  warnings->isFrench && !strncmp(aline+i,"?...",4) ||
-		  warnings->isFrench && !strncmp(aline+i,"...?",4))
-		    i+=4;
+		if (warnings->isFrench && g_str_has_prefix(s,",...") ||
+		  warnings->isFrench && g_str_has_prefix(s,"...,") ||
+		  warnings->isFrench && g_str_has_prefix(s,";...") ||
+		  warnings->isFrench && g_str_has_prefix(s,"...;") ||
+		  warnings->isFrench && g_str_has_prefix(s,":...") ||
+		  warnings->isFrench && g_str_has_prefix(s,"...:") ||
+		  warnings->isFrench && g_str_has_prefix(s,"!...") ||
+		  warnings->isFrench && g_str_has_prefix(s,"...!") ||
+		  warnings->isFrench && g_str_has_prefix(s,"?...") ||
+		  warnings->isFrench && g_str_has_prefix(s,"...?"))
+		{
+		    s+=4;
+		    nc=g_utf8_get_char(g_utf8_next_char(s));
+		}
 		; /* do nothing for .. !! and ?? which can be legit */
 	    }
 	    else
 	    {
 		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",aline);
+		    g_print("\n%s\n",aline);
 		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - Double punctuation?\n",
-		      linecnt,i+1);
+		    g_print("    Line %ld column %ld - Double punctuation?\n",
+		      linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 		else
 		    cnt_punct++;
 	    }
@@ -2177,37 +2232,37 @@
     while ((t=strstr(s," \" ")))
     {
 	if (pswit[ECHO_SWITCH])
-	    printf("\n%s\n",aline);
+	    g_print("\n%s\n",aline);
 	if (!pswit[OVERVIEW_SWITCH])
-	    printf("    Line %ld column %d - Spaced doublequote?\n",
-	      linecnt,(int)(t-aline+1));
+	    g_print("    Line %ld column %ld - Spaced doublequote?\n",
+	      linecnt,g_utf8_pointer_to_offset(aline,t)+1);
 	else
 	    cnt_punct++;
-	s=t+2;
+	s=g_utf8_next_char(g_utf8_next_char(t));
     }
     s=aline;
     while ((t=strstr(s," ' ")))
     {
 	if (pswit[ECHO_SWITCH])
-	    printf("\n%s\n",aline);
+	    g_print("\n%s\n",aline);
 	if (!pswit[OVERVIEW_SWITCH])
-	    printf("    Line %ld column %d - Spaced singlequote?\n",
-	      linecnt,(int)(t-aline+1));
+	    g_print("    Line %ld column %ld - Spaced singlequote?\n",
+	      linecnt,g_utf8_pointer_to_offset(aline,t)+1);
 	else
 	    cnt_punct++;
-	s=t+2;
+	s=g_utf8_next_char(g_utf8_next_char(t));
     }
     s=aline;
     while ((t=strstr(s," ` ")))
     {
 	if (pswit[ECHO_SWITCH])
-	    printf("\n%s\n",aline);
+	    g_print("\n%s\n",aline);
 	if (!pswit[OVERVIEW_SWITCH])
-	    printf("    Line %ld column %d - Spaced singlequote?\n",
-	      linecnt,(int)(t-aline+1));
+	    g_print("    Line %ld column %ld - Spaced singlequote?\n",
+	      linecnt,g_utf8_pointer_to_offset(aline,t)+1);
 	else
 	    cnt_punct++;
-	s=t+2;
+	s=g_utf8_next_char(g_utf8_next_char(t));
     }
 }
 
@@ -2219,22 +2274,26 @@
 void check_for_miscased_genative(const char *aline)
 {
     const char *s;
+    gunichar c,nc,pc;
     if (!*aline)
 	return;
-    s=aline+1;
-    while (*s)
+    c=g_utf8_get_char(aline);
+    nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
+    for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
     {
-	if (*s==CHAR_SQUOTE && s[1]=='S' && s[-1]>='a' && s[-1]<='z')
+	pc=c;
+	c=nc;
+	nc=g_utf8_get_char(g_utf8_next_char(s));
+	if (c==CHAR_SQUOTE && nc=='S' && g_unichar_islower(pc))
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column %d - Capital \"S\"?\n",
-		  linecnt,(int)(s-aline+2));
+		g_print("    Line %ld column %ld - Capital \"S\"?\n",
+		  linecnt,g_utf8_pointer_to_offset(aline,s)+2);
 	    else
 		cnt_punct++;
 	}
-	s++;
     }
 }
 
@@ -2248,29 +2307,34 @@
  */
 void check_end_of_line(const char *aline,struct warnings *warnings)
 {
-    int i,llen;
-    llen=strlen(aline);
-    if (llen>1)
+    int lbytes;
+    const char *s;
+    gunichar c1,c2;
+    lbytes=strlen(aline);
+    if (g_utf8_strlen(aline,lbytes)>1)
     {
-	if (aline[llen-1]==CHAR_DQUOTE || aline[llen-1]==CHAR_SQUOTE ||
-	  aline[llen-1]==CHAR_OPEN_SQUOTE)
-	    if (aline[llen-2]==CHAR_SPACE)
-	    {
-		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",aline);
-		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - Spaced quote?\n",
-		      linecnt,llen);
-		else
-		    cnt_punct++;
-	    }
-	if ((aline[0]==CHAR_SQUOTE || aline[0]==CHAR_OPEN_SQUOTE) &&
-	  aline[1]==CHAR_SPACE)
+	s=g_utf8_prev_char(aline+lbytes);
+	c1=g_utf8_get_char(s);
+	c2=g_utf8_get_char(g_utf8_prev_char(s));
+	if ((c1==CHAR_DQUOTE || c1==CHAR_SQUOTE || c1==CHAR_OPEN_SQUOTE) &&
+	  c2==CHAR_SPACE)
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column 1 - Spaced quote?\n",linecnt);
+		g_print("    Line %ld column %ld - Spaced quote?\n",linecnt,
+		  g_utf8_strlen(aline,lbytes));
+	    else
+		cnt_punct++;
+	}
+	c1=g_utf8_get_char(aline);
+	c2=g_utf8_get_char(g_utf8_next_char(aline));
+	if ((c1==CHAR_SQUOTE || c1==CHAR_OPEN_SQUOTE) && c2==CHAR_SPACE)
+	{
+	    if (pswit[ECHO_SWITCH])
+		g_print("\n%s\n",aline);
+	    if (!pswit[OVERVIEW_SWITCH])
+		g_print("    Line %ld column 1 - Spaced quote?\n",linecnt);
 	    else
 		cnt_punct++;
 	}
@@ -2280,15 +2344,18 @@
 	 */
 	if (pswit[PARANOID_SWITCH] && warnings->hyphen)
 	{
-	    for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)
+	    for (s=g_utf8_prev_char(aline+lbytes);
+	      s>aline && g_utf8_get_char(s)<=CHAR_SPACE;s=g_utf8_prev_char(s))
 		;
-	    if (aline[i]=='-' && aline[i-1]!='-')
+	    if (g_utf8_get_char(s)=='-' &&
+	      g_utf8_get_char(g_utf8_prev_char(s))!='-')
 	    {
 		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",aline);
+		    g_print("\n%s\n",aline);
 		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - Hyphen at end of line?\n",
-		      linecnt,i);
+		    g_print("    Line %ld column %ld - "
+		      "Hyphen at end of line?\n",
+		      linecnt,g_utf8_pointer_to_offset(aline,s));
 	    }
 	}
     }
@@ -2302,19 +2369,26 @@
  */
 void check_for_unspaced_bracket(const char *aline)
 {
-    int i,llen;
-    llen=strlen(aline);
-    for (i=1;i<llen-1;i++)
+    const char *s;
+    gunichar c,nc,pc;
+    c=g_utf8_get_char(aline);
+    nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
+    for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
     {
+	pc=c;
+	c=nc;
+	nc=g_utf8_get_char(g_utf8_next_char(s));
+	if (!nc)
+	    break;
 	/* for each bracket character in the line except 1st & last */
-	if (strchr("{[()]}",aline[i]) && gcisalpha(aline[i-1]) &&
-	  gcisalpha(aline[i+1]))
+	if (g_utf8_strchr("{[()]}",-1,c) &&
+	  g_unichar_isalpha(pc) && g_unichar_isalpha(nc))
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column %d - Unspaced bracket?\n",
-		  linecnt,i);
+		g_print("    Line %ld column %ld - Unspaced bracket?\n",
+		  linecnt,g_utf8_pointer_to_offset(aline,s));
 	    else
 		cnt_punct++;
 	}
@@ -2326,18 +2400,24 @@
  */
 void check_for_unpunctuated_endquote(const char *aline)
 {
-    int i,llen;
-    llen=strlen(aline);
-    for (i=1;i<llen;i++)
+    const char *s;
+    gunichar c,nc,pc;
+    c=g_utf8_get_char(aline);
+    nc=c?g_utf8_get_char(g_utf8_next_char(aline)):0;
+    for (s=g_utf8_next_char(aline);nc;s=g_utf8_next_char(s))
     {
+	pc=c;
+	c=nc;
+	nc=g_utf8_get_char(g_utf8_next_char(s));
 	/* for each character in the line except 1st */
-	if (aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))
+	if (c==CHAR_DQUOTE && isalpha(pc))
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column %d - "
-		  "endquote missing punctuation?\n",linecnt,i);
+		g_print("    Line %ld column %ld - "
+		  "endquote missing punctuation?\n",
+		  linecnt,g_utf8_pointer_to_offset(aline,s));
 	    else
 		cnt_punct++;
 	}
@@ -2354,25 +2434,25 @@
  */
 void check_for_html_tag(const char *aline)
 {
-    int i;
     const char *open,*close;
-    open=strstr(aline,"<");
+    gchar *tag;
+    open=strchr(aline,'<');
     if (open)
     {
-	close=strstr(aline,">");
+	close=strchr(g_utf8_next_char(open),'>');
 	if (close)
 	{
-	    i=(int)(close-open+1);
-	    if (i>0)
+	    if (pswit[ECHO_SWITCH])
+		g_print("\n%s\n",aline);
+	    if (!pswit[OVERVIEW_SWITCH])
 	    {
-		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",aline);
-		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - HTML Tag? %*.*s \n",
-		      linecnt,(int)(open-aline)+1,i,i,open);
-		else
-		    cnt_html++;
+		tag=g_strndup(open,close-open+1);
+		g_print("    Line %ld column %ld - HTML Tag? %s \n",
+		  linecnt,g_utf8_pointer_to_offset(aline,open)+1,tag);
+		g_free(tag);
 	    }
+	    else
+		cnt_html++;
 	}
     }
 }
@@ -2387,25 +2467,28 @@
  */
 void check_for_html_entity(const char *aline)
 {
-    int i;
     const char *s,*amp,*scolon;
-    amp=strstr(aline,"&");
+    gchar *entity;
+    amp=strchr(aline,'&');
     if (amp)
     {
-	scolon=strstr(aline,";");
+	scolon=strchr(amp,';');
 	if (scolon)
 	{
-	    i=(int)(scolon-amp+1);
-	    for (s=amp;s<scolon;s++)   
-		if (*s==CHAR_SPACE)
-		    i=0;		/* Don't report "Jones & Son;" */
-	    if (i>0)
+	    for (s=amp;s<scolon;s=g_utf8_next_char(s))   
+		if (g_utf8_get_char(s)==CHAR_SPACE)
+		    break;		/* Don't report "Jones & Son;" */
+	    if (s>=scolon)
 	    {
 		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",aline);
+		    g_print("\n%s\n",aline);
 		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - HTML symbol? %*.*s \n",
-		      linecnt,(int)(amp-aline)+1,i,i,amp);
+		{
+		    entity=g_strndup(amp,scolon-amp+1);
+		    g_print("    Line %ld column %d - HTML symbol? %s \n",
+		      linecnt,(int)(amp-aline)+1,entity);
+		    g_free(entity);
+		}
 		else
 		    cnt_html++;
 	    }
@@ -2425,18 +2508,20 @@
   struct pending *pending)
 {
     const char *s;
+    gunichar c;
     s=aline;
     while (*s==' ')
 	s++;
+    c=g_utf8_get_char(s);
     if (pending->dquote)
     {
-	if (*s!=CHAR_DQUOTE || pswit[QPARA_SWITCH])
+	if (c!=CHAR_DQUOTE || pswit[QPARA_SWITCH])
 	{
 	    if (!pswit[OVERVIEW_SWITCH])
 	    {
 		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",parastart);
-		puts(pending->dquote);
+		    g_print("\n%s\n",parastart);
+		g_print("%s\n",pending->dquote);
 	    }
 	    else
 		cnt_dquot++;
@@ -2446,14 +2531,14 @@
     }
     if (pending->squote)
     {
-	if (*s!=CHAR_SQUOTE && *s!=CHAR_OPEN_SQUOTE || pswit[QPARA_SWITCH] ||
+	if (c!=CHAR_SQUOTE && c!=CHAR_OPEN_SQUOTE || pswit[QPARA_SWITCH] ||
 	  pending->squot)
 	{
 	    if (!pswit[OVERVIEW_SWITCH])
 	    {
 		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",parastart);
-		puts(pending->squote);
+		    g_print("\n%s\n",parastart);
+		g_print("%s\n",pending->squote);
 	    }
 	    else
 		cnt_squot++;
@@ -2466,8 +2551,8 @@
 	if (!pswit[OVERVIEW_SWITCH])
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",parastart);
-	    puts(pending->rbrack);
+		g_print("\n%s\n",parastart);
+	    g_print("%s\n",pending->rbrack);
 	}
 	else
 	    cnt_brack++;
@@ -2479,8 +2564,8 @@
 	if (!pswit[OVERVIEW_SWITCH])
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",parastart);
-	    puts(pending->sbrack);
+		g_print("\n%s\n",parastart);
+	    g_print("%s\n",pending->sbrack);
 	}
 	else
 	    cnt_brack++;
@@ -2492,8 +2577,8 @@
 	if (!pswit[OVERVIEW_SWITCH])
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",parastart);
-	    puts(pending->cbrack);
+		g_print("\n%s\n",parastart);
+	    g_print("%s\n",pending->cbrack);
 	}
 	else
 	    cnt_brack++;
@@ -2505,8 +2590,8 @@
 	if (!pswit[OVERVIEW_SWITCH])
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",parastart);
-	    puts(pending->unders);
+		g_print("\n%s\n",parastart);
+	    g_print("%s\n",pending->unders);
 	}
 	else
 	    cnt_brack++;
@@ -2577,12 +2662,14 @@
 void check_for_omitted_punctuation(const char *prevline,
   struct line_properties *last,int start_para_line)
 {
-    int i;
+    gboolean letter_on_line=FALSE;
     const char *s;
-    for (s=prevline,i=0;*s && !i;s++)
-	if (gcisletter(*s))
-	    /* use i to indicate the presence of a letter on the line */
-	    i=1;
+    for (s=prevline;*s;s=g_utf8_next_char(s))
+	if (g_unichar_isalpha(g_utf8_get_char(s)))
+	{
+	    letter_on_line=TRUE;
+	    break;
+	}
     /*
      * This next "if" is a problem.
      * If we say "start_para_line <= linecnt - 1", that includes
@@ -2590,28 +2677,30 @@
      * If we say "start_para_line < linecnt - 1" it doesn't, but then it
      * misses genuine one-line paragraphs.
      */
-    if (i && last->blen>2 && start_para_line<linecnt-1 && *prevline>CHAR_SPACE)
+    if (letter_on_line && last->blen>2 && start_para_line<linecnt-1 &&
+      g_utf8_get_char(prevline)>CHAR_SPACE)
     {
-	for (i=strlen(prevline)-1;
-	  (prevline[i]==CHAR_DQUOTE || prevline[i]==CHAR_SQUOTE) &&
-	  prevline[i]>CHAR_SPACE && i>0;
-	  i--)
+	for (s=g_utf8_prev_char(prevline+strlen(prevline));
+	  (g_utf8_get_char(s)==CHAR_DQUOTE ||
+	  g_utf8_get_char(s)==CHAR_SQUOTE) &&
+	  g_utf8_get_char(s)>CHAR_SPACE && s>prevline;
+	  s=g_utf8_prev_char(s))
 	    ;
-	for (;i>0;i--)
+	for (;s>prevline;s=g_utf8_prev_char(s))
 	{
-	    if (gcisalpha(prevline[i]))
+	    if (g_unichar_isalpha(g_utf8_get_char(s)))
 	    {
 		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",prevline);
+		    g_print("\n%s\n",prevline);
 		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - "
+		    g_print("    Line %ld column %ld - "
 		      "No punctuation at para end?\n",
-		      linecnt-1,(int)strlen(prevline));
+		      linecnt-1,g_utf8_strlen(prevline,-1));
 		else
 		    cnt_punct++;
 		break;
 	    }
-	    if (strchr("-.:!([{?}])",prevline[i]))
+	    if (g_utf8_strchr("-.:!([{?}])",-1,g_utf8_get_char(s)))
 		break;
 	}
     }
@@ -2622,11 +2711,38 @@
     const char *word=key;
     int *dupcnt=value;
     if (*dupcnt)
-	printf("\nNote: Queried word %s was duplicated %d times\n",
+	g_print("\nNote: Queried word %s was duplicated %d times\n",
 	  word,*dupcnt);
     return FALSE;
 }
 
+void print_as_windows_1252(const char *string)
+{
+    gsize inbytes,outbytes;
+    gchar *buf,*bp;
+    GIConv converter=(GIConv)-1;
+    if (!string)
+    {
+	if (converter!=(GIConv)-1)
+	    g_iconv_close(converter);
+	converter=(GIConv)-1;
+	return;
+    }
+    if (converter=(GIConv)-1)
+	converter=g_iconv_open("WINDOWS-1252","UTF-8");
+    if (converter!=(GIConv)-1)
+    {
+	inbytes=outbytes=strlen(string);
+	bp=buf=g_malloc(outbytes+1);
+	g_iconv(converter,(char **)&string,&inbytes,&bp,&outbytes);
+	*bp='\0';
+	fputs(buf,stdout);
+	g_free(buf);
+    }
+    else
+	fputs(string,stdout);
+}
+
 /*
  * procfile:
  *
@@ -2659,7 +2775,8 @@
 	    fprintf(stderr,"bookloupe: %s: %s\n",filename,err->message);
 	exit(1);
     }
-    fprintf(stdout,"\n\nFile: %s\n\n",filename);
+    g_set_print_handler(print_as_windows_1252);
+    g_print("\n\nFile: %s\n\n",filename);
     first_pass_results=first_pass(etext);
     warnings=report_first_pass(first_pass_results);
     qword=g_tree_new_full((GCompareDataFunc)strcmp,NULL,g_free,g_free);
@@ -2674,7 +2791,7 @@
 	linecnt++;
 	if (linecnt==1)
 	    isnewpara=TRUE;
-	if (pswit[DP_SWITCH] && !strncmp(aline,"-----File: ",11))
+	if (pswit[DP_SWITCH] && g_str_has_prefix(aline,"-----File: "))
 	    continue;    // skip DP page separators completely
 	if (linecnt<first_pass_results->firstline ||
 	  (first_pass_results->footerline>0 &&
@@ -2682,14 +2799,14 @@
 	{
 	    if (pswit[HEADER_SWITCH])
 	    {
-		if (!strncmp(aline,"Title:",6))
-		    printf("    %s\n",aline);
-		if (!strncmp(aline,"Author:",7))
-		    printf("    %s\n",aline);
-		if (!strncmp(aline,"Release Date:",13))
-		    printf("    %s\n",aline);
-		if (!strncmp(aline,"Edition:",8))
-		    printf("    %s\n\n",aline);
+		if (g_str_has_prefix(aline,"Title:"))
+		    g_print("    %s\n",aline);
+		if (g_str_has_prefix(aline,"Author:"))
+		    g_print("    %s\n",aline);
+		if (g_str_has_prefix(aline,"Release Date:"))
+		    g_print("    %s\n",aline);
+		if (g_str_has_prefix(aline,"Edition:"))
+		    g_print("    %s\n\n",aline);
 	    }
 	    continue;		/* skip through the header */
 	}
@@ -2706,36 +2823,38 @@
 	    parastart=g_strdup(aline);
 	    memset(&parities,0,sizeof(parities));  /* restart the quote count */
 	    s=aline;
-	    while (!gcisalpha(*s) && !gcisdigit(*s) && *s)
-		s++;
-	    if (*s>='a' && *s<='z')
+	    while (*s && !g_unichar_isalpha(g_utf8_get_char(s)) &&
+	      !g_unichar_isdigit(g_utf8_get_char(s)))
+		s=g_utf8_next_char(s);
+	    if (g_unichar_islower(g_utf8_get_char(s)))
 	    {
 		/* and its first letter is lowercase */
 		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",aline);
+		    g_print("\n%s\n",aline);
 		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - "
+		    g_print("    Line %ld column %ld - "
 		      "Paragraph starts with lower-case\n",
-		      linecnt,(int)(s-aline)+1);
+		      linecnt,g_utf8_pointer_to_offset(aline,s)+1);
 		else
 		    cnt_punct++;
 	    }
 	    isnewpara=FALSE; /* Signal the end of new para processing. */
 	}
 	/* Check for an em-dash broken at line end. */
-	if (enddash && *aline=='-')
+	if (enddash && g_utf8_get_char(aline)=='-')
 	{
 	    if (pswit[ECHO_SWITCH])
-		printf("\n%s\n",aline);
+		g_print("\n%s\n",aline);
 	    if (!pswit[OVERVIEW_SWITCH])
-		printf("    Line %ld column 1 - Broken em-dash?\n",linecnt);
+		g_print("    Line %ld column 1 - Broken em-dash?\n",linecnt);
 	    else
 		cnt_punct++;
 	}
 	enddash=FALSE;
-	for (s=aline+strlen(aline)-1;*s==' ' && s>aline;s--)
+	for (s=g_utf8_prev_char(aline+strlen(aline));
+	  g_utf8_get_char(s)==' ' && s>aline;s=g_utf8_prev_char(s))
 	    ;
-	if (s>=aline && *s=='-')
+	if (s>=aline && g_utf8_get_char(s)=='-')
 	    enddash=TRUE;
 	check_for_control_characters(aline);
 	if (warnings->bin)
@@ -2745,8 +2864,8 @@
 	if (warnings->shortline)
 	    check_for_short_line(aline,&last);
 	last.blen=last.len;
-	last.len=strlen(aline);
-	last.start=aline[0];
+	last.len=g_utf8_strlen(aline,-1);
+	last.start=g_utf8_get_char(aline);
 	check_for_starting_punctuation(aline);
 	if (warnings->dash)
 	{
@@ -2795,6 +2914,8 @@
 	g_tree_foreach(qword,report_duplicate_queries,NULL);
     g_tree_unref(qword);
     g_tree_unref(qperiod);
+    g_set_print_handler(NULL);
+    print_as_windows_1252(NULL);
 }
 
 /*
@@ -2807,14 +2928,15 @@
  */
 char *flgets(char **etext,long lcnt)
 {
-    char c;
-    int len;
+    gunichar c;
     gboolean isCR=FALSE;
     char *theline=*etext;
-    len=0;
-    for(;;)
+    char *eos=theline;
+    gchar *s;
+    for (;;)
     {
-	c=*(*etext)++;
+	c=g_utf8_get_char(*etext);
+	*etext=g_utf8_next_char(*etext);
 	if (!c)
 	    return NULL;
 	/* either way, it's end of line */
@@ -2828,9 +2950,13 @@
 		if (pswit[LINE_END_SWITCH])
 		{
 		    if (pswit[ECHO_SWITCH])
-			printf("\n%*.*s\n",len,len,theline);
+		    {
+			s=g_strndup(theline,eos-theline);
+			g_print("\n%s\n",s);
+			g_free(s);
+		    }
 		    if (!pswit[OVERVIEW_SWITCH])
-			printf("    Line %ld - No CR?\n",lcnt);
+			g_print("    Line %ld - No CR?\n",lcnt);
 		    else
 			cnt_lineend++;
 		}
@@ -2845,9 +2971,13 @@
 		if (pswit[LINE_END_SWITCH])
 		{
 		    if (pswit[ECHO_SWITCH])
-			printf("\n%*.*s\n",len,len,theline);
+		    {
+			s=g_strndup(theline,eos-theline);
+			g_print("\n%s\n",s);
+			g_free(s);
+		    }
 		    if (!pswit[OVERVIEW_SWITCH])
-			printf("    Line %ld - Two successive CRs?\n",lcnt);
+			g_print("    Line %ld - Two successive CRs?\n",lcnt);
 		    else
 			cnt_lineend++;
 		}
@@ -2859,19 +2989,23 @@
 	    if (pswit[LINE_END_SWITCH] && isCR)
 	    {
 		if (pswit[ECHO_SWITCH])
-		    printf("\n%*.*s\n",len,len,theline);
+		{
+		    s=g_strndup(theline,eos-theline);
+		    g_print("\n%s\n",s);
+		    g_free(s);
+		}
 		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - CR without LF?\n",
-		      lcnt,len+1);
+		    g_print("    Line %ld column %ld - CR without LF?\n",
+		      lcnt,g_utf8_pointer_to_offset(theline,eos)+1);
 		else
 		    cnt_lineend++;
-		theline[len]=' ';
+		*eos=' ';
 	    }
 	    isCR=FALSE;
-	    len++;
+	    eos=g_utf8_next_char(eos);
 	}
     }
-    theline[len]='\0';
+    *eos='\0';
     if (pswit[MARKUP_SWITCH])  
 	postprocess_for_HTML(theline);
     if (pswit[DP_SWITCH])  
@@ -2886,55 +3020,55 @@
  * contains a mixture of alpha and digits. Generally, this is an
  * error, but may not be for cases like 4th or L5 12s. 3d.
  *
- * Returns: 0 if no error found, 1 if error.
+ * Returns: TRUE iff an is error found.
  */
-int mixdigit(const char *checkword)
+gboolean mixdigit(const char *checkword)
 {
-    int wehaveadigit,wehavealetter,firstdigits,query,wl;
-    const char *s;
-    wehaveadigit=wehavealetter=query=0;
-    for (s=checkword;*s;s++)
-	if (gcisalpha(*s))
-	    wehavealetter=1;
-	else
-	    if (gcisdigit(*s))
-		wehaveadigit=1;
+    gboolean wehaveadigit,wehavealetter,query;
+    const char *s,*nondigit;
+    wehaveadigit=wehavealetter=query=FALSE;
+    for (s=checkword;*s;s=g_utf8_next_char(s))
+	if (g_unichar_isalpha(g_utf8_get_char(s)))
+	    wehavealetter=TRUE;
+	else if (g_unichar_isdigit(g_utf8_get_char(s)))
+	    wehaveadigit=TRUE;
     if (wehaveadigit && wehavealetter)
     {
 	/* Now exclude common legit cases, like "21st" and "12l. 3s. 11d." */
-	query=1;
-	wl=strlen(checkword);
-	for (firstdigits=0;gcisdigit(checkword[firstdigits]);firstdigits++)
+	query=TRUE;
+	for (nondigit=checkword;g_unichar_isdigit(g_utf8_get_char(nondigit));
+	  nondigit=g_utf8_next_char(nondigit))
 	    ;
 	/* digits, ending in st, rd, nd, th of either case */
-	if (firstdigits+2==wl && (!g_ascii_strcasecmp(checkword+wl-2,"st") ||
-	  !g_ascii_strcasecmp(checkword+wl-2,"rd") ||
-	  !g_ascii_strcasecmp(checkword+wl-2,"nd") ||
-	  !g_ascii_strcasecmp(checkword+wl-2,"th")))
-	    query=0;
-	if (firstdigits+3==wl && (!g_ascii_strcasecmp(checkword+wl-3,"sts") ||
-	  !g_ascii_strcasecmp(checkword+wl-3,"rds") ||
-	  !g_ascii_strcasecmp(checkword+wl-3,"nds") ||
-	  !g_ascii_strcasecmp(checkword+wl-3,"ths")))
-	    query=0;
-	if (firstdigits+3==wl && (!g_ascii_strcasecmp(checkword+wl-4,"stly") ||
-	  !g_ascii_strcasecmp(checkword+wl-4,"rdly") ||
-	  !g_ascii_strcasecmp(checkword+wl-4,"ndly") ||
-	  !g_ascii_strcasecmp(checkword+wl-4,"thly")))
-	    query=0;
+	if (!g_ascii_strcasecmp(nondigit,"st") ||
+	  !g_ascii_strcasecmp(nondigit,"rd") ||
+	  !g_ascii_strcasecmp(nondigit,"nd") ||
+	  !g_ascii_strcasecmp(nondigit,"th"))
+	    query=FALSE;
+	if (!g_ascii_strcasecmp(nondigit,"sts") ||
+	  !g_ascii_strcasecmp(nondigit,"rds") ||
+	  !g_ascii_strcasecmp(nondigit,"nds") ||
+	  !g_ascii_strcasecmp(nondigit,"ths"))
+	    query=FALSE;
+	if (!g_ascii_strcasecmp(nondigit,"stly") ||
+	  !g_ascii_strcasecmp(nondigit,"rdly") ||
+	  !g_ascii_strcasecmp(nondigit,"ndly") ||
+	  !g_ascii_strcasecmp(nondigit,"thly"))
+	    query=FALSE;
 	/* digits, ending in l, L, s or d */
-	if (firstdigits+1==wl && (checkword[wl-1]=='l' ||
-	  checkword[wl-1]=='L' || checkword[wl-1]=='s' || checkword[wl-1]=='d'))
-	    query=0;
+	if (!g_ascii_strcasecmp(nondigit,"l") || !strcmp(nondigit,"s") ||
+	  !strcmp(nondigit,"d"))
+	    query=FALSE;
 	/*
 	 * L at the start of a number, representing Britsh pounds, like L500.
-	 * This is cute. We know the current word is mixeddigit. If the first
+	 * This is cute. We know the current word is mixed digit. If the first
 	 * letter is L, there must be at least one digit following. If both
 	 * digits and letters follow, we have a genuine error, else we have a
 	 * capital L followed by digits, and we accept that as a non-error.
 	 */
-	if (checkword[0]=='L' && !mixdigit(checkword+1))
-	    query=0;
+	if (g_utf8_get_char(checkword)=='L' &&
+	  !mixdigit(g_utf8_next_char(checkword)))
+	    query=FALSE;
     }
     return query;
 }
@@ -2951,11 +3085,13 @@
  */
 gchar *getaword(const char **ptr)
 {
-    int i;
-    const char *s;
+    const char *s,*t;
     GString *word;
+    gunichar c,pc;
     word=g_string_new(NULL);
-    for (;!gcisdigit(**ptr) && !gcisalpha(**ptr) && **ptr;(*ptr)++)
+    for (;!g_unichar_isdigit(g_utf8_get_char(*ptr)) &&
+      !g_unichar_isalpha(g_utf8_get_char(*ptr)) &&
+      **ptr;*ptr=g_utf8_next_char(*ptr))
 	;
     /*
      * Use a look-ahead to handle exceptions for numbers like 1,000 and 1.35.
@@ -2966,23 +3102,27 @@
      * the results and resume our normal programming.
      */
     s=*ptr;
-    for (;gcisdigit(*s) || gcisalpha(*s) || *s==',' || *s=='.';s++)
-	g_string_append_c(word,*s);
-    for (i=1;i+1<word->len;i++)
+    for (;g_unichar_isdigit(g_utf8_get_char(s)) ||
+      g_unichar_isalpha(g_utf8_get_char(s)) ||
+      g_utf8_get_char(s)==',' || g_utf8_get_char(s)=='.';s=g_utf8_next_char(s))
+	g_string_append_unichar(word,g_utf8_get_char(s));
+    for (t=g_utf8_next_char(word->str);*g_utf8_next_char(t);
+      t=g_utf8_next_char(t))
     {
-	if (word->str[i]=='.' || word->str[i]==',')
+	c=g_utf8_get_char(t);
+	pc=g_utf8_get_char(g_utf8_prev_char(t));
+	if ((c=='.' || c==',') && g_unichar_isdigit(pc))
 	{
-	    if (gcisdigit(word->str[i-1]) && gcisdigit(word->str[i-1]))
-	    {
-		*ptr=s;
-		return g_string_free(word,FALSE);
-	    }
+	    *ptr=s;
+	    return g_string_free(word,FALSE);
 	}
     }
     /* we didn't find a punctuated number - do the regular getword thing */
     g_string_truncate(word,0);
-    for (;gcisdigit(**ptr) || gcisalpha(**ptr) || **ptr=='\'';(*ptr)++)
-	g_string_append_c(word,**ptr);
+    for (;g_unichar_isdigit(g_utf8_get_char(*ptr)) ||
+      g_unichar_isalpha(g_utf8_get_char(*ptr)) ||
+      g_utf8_get_char(*ptr)=='\'';*ptr=g_utf8_next_char(*ptr))
+	g_string_append_unichar(word,g_utf8_get_char(*ptr));
     return g_string_free(word,FALSE);
 }
 
@@ -3006,82 +3146,36 @@
     if (!t || !*t)
 	return FALSE;
     s=t;
-    while (*t=='m' && *t)
+    while (g_utf8_get_char(t)=='m' && *t)
 	t++;
-    if (*t=='d')
+    if (g_utf8_get_char(t)=='d')
 	t++;
-    if (*t=='c' && t[1]=='m')
+    if (g_str_has_prefix(t,"cm"))
 	t+=2;
-    if (*t=='c' && t[1]=='d')
+    if (g_str_has_prefix(t,"cd"))
 	t+=2;
-    while (*t=='c' && *t)
+    while (g_utf8_get_char(t)=='c' && *t)
 	t++;
-    if (*t=='x' && t[1]=='l')
+    if (g_str_has_prefix(t,"xl"))
 	t+=2;
-    if (*t=='x' && t[1]=='c')
+    if (g_str_has_prefix(t,"xc"))
 	t+=2;
-    if (*t=='l')
+    if (g_utf8_get_char(t)=='l')
 	t++;
-    while (*t=='x' && *t)
+    while (g_utf8_get_char(t)=='x' && *t)
 	t++;
-    if (*t=='i' && t[1]=='x')
+    if (g_str_has_prefix(t,"ix"))
 	t+=2;
-    if (*t=='i' && t[1]=='v')
+    if (g_str_has_prefix(t,"iv"))
 	t+=2;
-    if (*t=='v')
+    if (g_utf8_get_char(t)=='v')
 	t++;
-    while (*t=='i' && *t)
+    while (g_utf8_get_char(t)=='i' && *t)
 	t++;
     return !*t;
 }
 
 /*
- * gcisalpha:
- *
- * A version of isalpha() that is somewhat lenient on 8-bit texts.
- * If we use the standard function, 8-bit accented characters break
- * words, so that tete with accented characters appears to be two words, "t"
- * and "t", with 8-bit characters between them. This causes over-reporting of
- * errors. gcisalpha() recognizes accented letters from the CP1252 (Windows)
- * and ISO-8859-1 character sets, which are the most common PG 8-bit types.
- */
-gboolean gcisalpha(unsigned char c)
-{
-    if (c>='a' && c<='z')
-	return TRUE;
-    if (c>='A' && c<='Z')
-	return TRUE;
-    if (c<140)
-	return FALSE;
-    if (c>=192 && c!=208 && c!=215 && c!=222 && c!=240 && c!=247 && c!=254)
-	return TRUE;
-    if (c==140 || c==142 || c==156 || c==158 || c==159)
-	return TRUE;
-    return FALSE;
-}
-
-/*
- * gcisdigit:
- *
- * A version of isdigit() that doesn't get confused in 8-bit texts.
- */
-gboolean gcisdigit(unsigned char c)
-{   
-    return c>='0' && c<='9';
-}
-
-/*
- * gcisletter:
- *
- * A version of isletter() that doesn't get confused in 8-bit texts.
- * NB: this is ISO-8891-1-specific.
- */
-gboolean gcisletter(unsigned char c)
-{   
-    return c>='A' && c<='Z' || c>='a' && c<='z' || c>=192;
-}
-
-/*
  * postprocess_for_DP:
  *
  * Invoked with the -d switch from flgets().
@@ -3096,21 +3190,11 @@
     if (!*theline) 
 	return;
     for (i=0;*DPmarkup[i];i++)
-    {
-	s=strstr(theline,DPmarkup[i]);
-	while (s)
+	while ((s=strstr(theline,DPmarkup[i])))
 	{
 	    t=s+strlen(DPmarkup[i]);
-	    while (*t)
-	    {
-		*s=*t;
-		t++;
-		s++;
-	    }
-	    *s=0;
-	    s=strstr(theline,DPmarkup[i]);
+	    memmove(s,t,strlen(t)+1);
 	}
-    }
 }
 
 /*
@@ -3124,9 +3208,8 @@
  */
 void postprocess_for_HTML(char *theline)
 {
-    if (strchr(theline,'<') && strchr(theline,'>'))
-	while (losemarkup(theline))
-	    ;
+    while (losemarkup(theline))
+	;
     while (loseentities(theline))
 	;
 }
@@ -3135,25 +3218,16 @@
 {
     char *s,*t;
     int i;
-    if (!*theline) 
-	return NULL;
-    s=strstr(theline,"<");
-    t=strstr(theline,">");
+    s=strchr(theline,'<');
+    t=s?strchr(s,'>'):NULL;
     if (!s || !t)
 	return NULL;
     for (i=0;*markup[i];i++)
-	if (!tagcomp(s+1,markup[i]))
+	if (tagcomp(g_utf8_next_char(s),markup[i]))
 	{
-	    if (!t[1])
-	    {
-		*s=0;
-		return s;
-	    }
-	    else if (t>s)
-	    {
-		strcpy(s,t+1);
-		return s;
-	    }
+	    t=g_utf8_next_char(t);
+	    memmove(s,t,strlen(t)+1);
+	    return s;
 	}
     /* It's an unrecognized <xxx>. */
     return NULL;
@@ -3170,13 +3244,10 @@
 	s=strstr(theline,entities[i].htmlent);
 	if (s)
 	{
-	    t=malloc((size_t)strlen(s));
-	    if (!t)
-		return NULL;
-	    strcpy(t,s+strlen(entities[i].htmlent));
+	    t=g_strdup(s+strlen(entities[i].htmlent));
 	    strcpy(s,entities[i].textent);
 	    strcat(s,t);
-	    free(t);
+	    g_free(t);
 	    return theline;
 	}
     }
@@ -3185,34 +3256,29 @@
 	s=strstr(theline,entities[i].htmlnum);
 	if (s)
 	{
-	    t=malloc((size_t)strlen(s));
-	    if (!t)
-		return NULL;
-	    strcpy(t,s+strlen(entities[i].htmlnum));
+	    t=g_strdup(s+strlen(entities[i].htmlnum));
 	    strcpy(s,entities[i].textent);
 	    strcat(s,t);
-	    free(t);
+	    g_free(t);
 	    return theline;
 	}
     }
     return NULL;
 }
 
-int tagcomp(const char *strin,const char *basetag)
+gboolean tagcomp(const char *strin,const char *basetag)
 {
-    const char *s,*t;
-    s=basetag;
-    t=strin;
-    if (*t=='/')
-	t++; /* ignore a slash */
-    while (*s && *t)
-    {
-	if (tolower(*s)!=tolower(*t))
-	    return 1;
-	s++;
-	t++;
-    }
-    return 0;
+    gboolean retval;
+    gchar *s,*t;
+    if (g_utf8_get_char(strin)=='/')
+	t=g_utf8_casefold(g_utf8_next_char(strin),-1); /* ignore a slash */
+    else
+	t=g_utf8_casefold(strin,-1);
+    s=g_utf8_casefold(basetag,-1);
+    retval=g_str_has_prefix(t,s);
+    g_free(s);
+    g_free(t);
+    return retval;
 }
 
 void proghelp(GOptionContext *context)