# HG changeset patch
# User ali <ali@juiblex.co.uk>
# Date 1369583646 -3600
# Node ID 6b786cc05b3cb4a3a678e44112e0b966da590c70
# Parent  23b2ea51b029a4c26908ec0c2b048b274b44952f
Break check_for_typos() out

diff -r 23b2ea51b029 -r 6b786cc05b3c bookloupe/bookloupe.c
--- a/bookloupe/bookloupe.c	Sun May 26 16:39:48 2013 +0100
+++ b/bookloupe/bookloupe.c	Sun May 26 16:54:06 2013 +0100
@@ -1570,14 +1570,225 @@
 }
 
 /*
+ * check_for_typos:
+ *
+ * Check for commonly mistyped words,
+ * and digits like 0 for O in a word.
+ */
+void check_for_typos(const char *aline,struct warnings *warnings)
+{
+    const char *s,*wordstart;
+    char inword[MAXWORDLEN],testword[MAXWORDLEN];
+    int i,istypo,isdup,alower,vowel,consonant;
+    static int qword_index=0;
+    for (s=aline;*s;)
+    {
+	wordstart=s;
+	s=getaword(s,inword);
+	if (!*inword)
+	    continue; /* don't bother with empty lines */
+	if (mixdigit(inword))
+	{
+	    if (pswit[ECHO_SWITCH])
+		printf("\n%s\n",aline);
+	    if (!pswit[OVERVIEW_SWITCH])
+		printf("    Line %ld column %d - Query digit in %s\n",
+		  linecnt,(int)(wordstart-aline)+1,inword);
+	    else
+		cnt_word++;
+	}
+	/*
+	 * Put the word through a series of tests for likely typos and OCR
+	 * errors.
+	 */
+	if (pswit[TYPO_SWITCH])
+	{
+	    istypo=0;
+	    strcpy(testword,inword);
+	    alower=0;
+	    for (i=0;i<(signed int)strlen(testword);i++)
+	    {
+		/* lowercase for testing */
+		if (testword[i]>='a' && testword[i]<='z')
+		    alower=1;
+		if (alower && testword[i]>='A' && testword[i]<='Z')
+		{
+		    /*
+		     * We have an uppercase mid-word. However, there are
+		     * common cases:
+		     *   Mac and Mc like McGill
+		     *   French contractions like l'Abbe
+		     */
+		    if (i==2 && testword[0]=='m' && testword[1]=='c' ||
+		      i==3 && testword[0]=='m' && testword[1]=='a' &&
+		      testword[2]=='c' || i>0 && testword[i-1]==CHAR_SQUOTE)
+			; /* do nothing! */
+		    else
+			istypo=1;
+		}
+		testword[i]=(char)tolower(testword[i]);
+	    }
+	    /*
+	     * Check for certain unlikely two-letter combinations at word
+	     * start and end.
+	     */
+	    if (strlen(testword)>1)
+	    {
+		for (i=0;*nostart[i];i++)
+		    if (!strncmp(testword,nostart[i],2))
+			istypo=1;
+		for (i=0;*noend[i];i++)
+		    if (!strncmp(testword+strlen(testword)-2,noend[i],2))
+			istypo=1;
+	    }
+	    /* ght is common, gbt never. Like that. */
+	    if (strstr(testword,"cb"))
+		istypo=1;
+	    if (strstr(testword,"gbt"))
+		istypo=1;
+	    if (strstr(testword,"pbt"))
+		istypo=1;
+	    if (strstr(testword,"tbs"))
+		istypo=1;
+	    if (strstr(testword,"mrn"))
+		istypo=1;
+	    if (strstr(testword,"ahle"))
+		istypo=1;
+	    if (strstr(testword,"ihle"))
+		istypo=1;
+	    /*
+	     * "TBE" does happen - like HEARTBEAT - but uncommon.
+	     * Also "TBI" - frostbite, outbid - but uncommon.
+	     * Similarly "ii" like Hawaii, or Pompeii, and in Roman
+	     * numerals, but "ii" is a common scanno.
+	     */
+	    if (strstr(testword,"tbi"))
+		istypo=1;
+	    if (strstr(testword,"tbe"))
+		istypo=1;
+	    if (strstr(testword,"ii"))
+		istypo=1;
+	    /*
+	     * Check for no vowels or no consonants.
+	     * If none, flag a typo.
+	     */
+	    if (!istypo && strlen(testword)>1)
+	    {
+		vowel=consonant=0;
+		for (i=0;testword[i];i++)
+		{
+		    if (testword[i]=='y' || gcisdigit(testword[i]))
+		    {
+			/* Yah, this is loose. */
+			vowel++;
+			consonant++;
+		    }
+		    else if (strchr(vowels,testword[i]))
+			vowel++;
+		    else
+			consonant++;
+		}
+		if (!vowel || !consonant)
+		    istypo=1;
+	    }
+	    /*
+	     * Now exclude the word from being reported if it's in
+	     * the okword list.
+	     */
+	    for (i=0;*okword[i];i++)
+		if (!strcmp(testword,okword[i]))
+		    istypo=0;
+	    /*
+	     * What looks like a typo may be a Roman numeral.
+	     * Exclude these.
+	     */
+	    if (istypo && isroman(testword))
+		istypo=0;
+	    /* Check the manual list of typos. */
+	    if (!istypo)
+		for (i=0;*typo[i];i++)
+		    if (!strcmp(testword,typo[i]))
+			istypo=1;
+	    /*
+	     * Check lowercase s, l, i and m - special cases.
+	     *   "j" - often a semi-colon gone wrong.
+	     *   "d" for a missing apostrophe - he d
+	     *   "n" for "in"
+	     */
+	    if (!istypo && strlen(testword)==1 && strchr("slmijdn",*inword))
+		istypo=1;
+	    if (istypo)
+	    {
+		isdup=0;
+		if (strlen(testword)<MAX_QWORD_LENGTH &&
+		  !pswit[VERBOSE_SWITCH])
+		    for (i=0;i<qword_index;i++)
+			if (!strcmp(testword,qword[i]))
+			{
+			    isdup=1;
+			    ++dupcnt[i];
+			}
+		if (!isdup)
+		{
+		    if (qword_index<MAX_QWORD &&
+		      strlen(testword)<MAX_QWORD_LENGTH)
+		    {
+			strcpy(qword[qword_index],testword);
+			qword_index++;
+		    }
+		    if (pswit[ECHO_SWITCH])
+			printf("\n%s\n",aline);
+		    if (!pswit[OVERVIEW_SWITCH])
+		    {
+			printf("    Line %ld column %d - Query word %s",
+			  linecnt,(int)(wordstart-aline)+1,inword);
+			if (strlen(testword)<MAX_QWORD_LENGTH &&
+			  !pswit[VERBOSE_SWITCH])
+			    printf(" - not reporting duplicates");
+			printf("\n");
+		    }
+		    else
+			cnt_word++;
+		}
+	    }
+	}
+	/* check the user's list of typos */
+	if (!istypo && usertypo_count)
+	    for (i=0;i<usertypo_count;i++)
+		if (!strcmp(testword,usertypo[i]))
+		{
+		    if (pswit[ECHO_SWITCH])
+			printf("\n%s\n",aline);
+		    if (!pswit[OVERVIEW_SWITCH])  
+			printf("    Line %ld column %d - "
+			  "Query possible scanno %s\n",
+			  linecnt,(int)(wordstart-aline)+2,inword);
+		}
+	if (pswit[PARANOID_SWITCH] && warnings->digit)
+	{
+	    /* In paranoid mode, query all 0 and 1 standing alone. */
+	    if (!strcmp(inword,"0") || !strcmp(inword,"1"))
+	    {
+		if (pswit[ECHO_SWITCH])
+		    printf("\n%s\n",aline);
+		if (!pswit[OVERVIEW_SWITCH])
+		    printf("    Line %ld column %d - Query standalone %s\n",
+		      linecnt,(int)(wordstart-aline)+2,inword);
+		else
+		    cnt_word++;
+	    }
+	}
+    }
+}
+
+/*
  * procfile:
  *
  * Process one file.
  */
 void procfile(char *filename)
 {
-    const char *s,*t,*wordstart;
-    char inword[MAXWORDLEN],testword[MAXWORDLEN];
+    const char *s,*t;
     char parastart[81];     /* first line of current para */
     FILE *infile;
     struct first_pass_results *first_pass_results;
@@ -1586,12 +1797,11 @@
     struct line_properties last={0};
     int isemptyline;
     long squot,start_para_line;
-    signed int i,llen,isacro,isellipsis,istypo,alower;
+    signed int i,llen,isacro,isellipsis;
     signed int dquotepar,squotepar;
-    signed int isnewpara,vowel,consonant;
+    signed int isnewpara;
     char dquote_err[80],squote_err[80],rbrack_err[80],sbrack_err[80],
       cbrack_err[80],unders_err[80];
-    signed int qword_index,isdup;
     signed int enddash;
     last.start=CHAR_SPACE;
     *dquote_err=*squote_err=*rbrack_err=*cbrack_err=*sbrack_err=
@@ -1599,9 +1809,7 @@
     linecnt=checked_linecnt=start_para_line=0;
     squot=0;
     i=llen=isacro=isellipsis=0;
-    isnewpara=vowel=consonant=enddash=0;
-    qword_index=0;
-    *inword=*testword=0;
+    isnewpara=enddash=0;
     dquotepar=squotepar=0;
     infile=fopen(filename,"rb");
     if (!infile)
@@ -1813,208 +2021,7 @@
 	check_for_pling_scanno(aline);
 	check_for_extra_period(aline,warnings);
 	check_for_following_punctuation(aline);
-        /*
-	 * Check for commonly mistyped words,
-	 * and digits like 0 for O in a word.
-	 */
-        for (s=aline;*s;)
-	{
-            wordstart=s;
-            s=getaword(s,inword);
-            if (!*inword)
-		continue; /* don't bother with empty lines */
-            if (mixdigit(inword))
-	    {
-                if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",aline);
-                if (!pswit[OVERVIEW_SWITCH])
-                    printf("    Line %ld column %d - Query digit in %s\n",
-		      linecnt,(int)(wordstart-aline)+1,inword);
-                else
-                    cnt_word++;
-	    }
-            /*
-	     * Put the word through a series of tests for likely typos and OCR
-	     * errors.
-	     */
-            if (pswit[TYPO_SWITCH])
-	    {
-                istypo=0;
-                strcpy(testword,inword);
-                alower=0;
-                for (i=0;i<(signed int)strlen(testword);i++)
-		{
-		    /* lowercase for testing */
-                    if (testword[i]>='a' && testword[i]<='z')
-			alower=1;
-                    if (alower && testword[i]>='A' && testword[i]<='Z')
-		    {
-                        /*
-			 * We have an uppercase mid-word. However, there are
-			 * common cases:
-                         *   Mac and Mc like McGill
-                         *   French contractions like l'Abbe
-			 */
-                        if (i==2 && testword[0]=='m' && testword[1]=='c' ||
-                          i==3 && testword[0]=='m' && testword[1]=='a' &&
-			  testword[2]=='c' || i>0 && testword[i-1]==CHAR_SQUOTE)
-			    ; /* do nothing! */
-                        else
-                            istypo=1;
-		    }
-                    testword[i]=(char)tolower(testword[i]);
-		}
-                /*
-		 * Check for certain unlikely two-letter combinations at word
-		 * start and end.
-		 */
-                if (strlen(testword)>1)
-		{
-                    for (i=0;*nostart[i];i++)
-                        if (!strncmp(testword,nostart[i],2))
-                            istypo=1;
-                    for (i=0;*noend[i];i++)
-                        if (!strncmp(testword+strlen(testword)-2,noend[i],2))
-                            istypo=1;
-		}
-                /* ght is common, gbt never. Like that. */
-                if (strstr(testword,"cb"))
-		    istypo=1;
-                if (strstr(testword,"gbt"))
-		    istypo=1;
-                if (strstr(testword,"pbt"))
-		    istypo=1;
-                if (strstr(testword,"tbs"))
-		    istypo=1;
-                if (strstr(testword,"mrn"))
-		    istypo=1;
-                if (strstr(testword,"ahle"))
-		    istypo=1;
-                if (strstr(testword,"ihle"))
-		    istypo=1;
-                /*
-		 * "TBE" does happen - like HEARTBEAT - but uncommon.
-                 * Also "TBI" - frostbite, outbid - but uncommon.
-                 * Similarly "ii" like Hawaii, or Pompeii, and in Roman
-		 * numerals, but "ii" is a common scanno.
-		 */
-                if (strstr(testword,"tbi"))
-		    istypo=1;
-                if (strstr(testword,"tbe"))
-		    istypo=1;
-                if (strstr(testword,"ii"))
-		    istypo=1;
-                /*
-		 * Check for no vowels or no consonants.
-                 * If none, flag a typo.
-		 */
-                if (!istypo && strlen(testword)>1)
-		{
-                    vowel=consonant=0;
-                    for (i=0;testword[i];i++)
-		    {
-                        if (testword[i]=='y' || gcisdigit(testword[i]))
-			{
-			    /* Yah, this is loose. */
-                            vowel++;
-                            consonant++;
-			}
-                        else if (strchr(vowels,testword[i]))
-			    vowel++;
-			else
-			    consonant++;
-		    }
-                    if (!vowel || !consonant)
-                        istypo=1;
-		}
-                /*
-		 * Now exclude the word from being reported if it's in
-                 * the okword list.
-		 */
-                for (i=0;*okword[i];i++)
-                    if (!strcmp(testword,okword[i]))
-                        istypo=0;
-                /*
-		 * What looks like a typo may be a Roman numeral.
-		 * Exclude these.
-		 */
-                if (istypo && isroman(testword))
-		    istypo=0;
-                /* Check the manual list of typos. */
-                if (!istypo)
-                    for (i=0;*typo[i];i++)
-                        if (!strcmp(testword,typo[i]))
-                            istypo=1;
-                /*
-		 * Check lowercase s, l, i and m - special cases.
-                 *   "j" - often a semi-colon gone wrong.
-                 *   "d" for a missing apostrophe - he d
-                 *   "n" for "in"
-		 */
-                if (!istypo && strlen(testword)==1 && strchr("slmijdn",*inword))
-		    istypo=1;
-                if (istypo)
-		{
-                    isdup=0;
-                    if (strlen(testword)<MAX_QWORD_LENGTH &&
-		      !pswit[VERBOSE_SWITCH])
-                        for (i=0;i<qword_index;i++)
-                            if (!strcmp(testword,qword[i]))
-			    {
-                                isdup=1;
-                                ++dupcnt[i];
-			    }
-                    if (!isdup)
-		    {
-                        if (qword_index<MAX_QWORD &&
-			  strlen(testword)<MAX_QWORD_LENGTH)
-			{
-                            strcpy(qword[qword_index],testword);
-                            qword_index++;
-			}
-                        if (pswit[ECHO_SWITCH])
-			    printf("\n%s\n",aline);
-                        if (!pswit[OVERVIEW_SWITCH])
-			{
-                            printf("    Line %ld column %d - Query word %s",
-			      linecnt,(int)(wordstart-aline)+1,inword);
-                            if (strlen(testword)<MAX_QWORD_LENGTH &&
-			      !pswit[VERBOSE_SWITCH])
-                                printf(" - not reporting duplicates");
-                            printf("\n");
-			}
-                        else
-                            cnt_word++;
-		    }
-		}
-	    }
-	    /* check the user's list of typos */
-	    if (!istypo && usertypo_count)
-		for (i=0;i<usertypo_count;i++)
-		    if (!strcmp(testword,usertypo[i]))
-		    {
-			if (pswit[ECHO_SWITCH])
-			    printf("\n%s\n",aline);
-			if (!pswit[OVERVIEW_SWITCH])  
-			    printf("    Line %ld column %d - "
-			      "Query possible scanno %s\n",
-			      linecnt,(int)(wordstart-aline)+2,inword);
-		    }
-            if (pswit[PARANOID_SWITCH] && warnings->digit)
-	    {
-		/* In paranoid mode, query all 0 and 1 standing alone. */
-                if (!strcmp(inword,"0") || !strcmp(inword,"1"))
-		{
-                    if (pswit[ECHO_SWITCH])
-			printf("\n%s\n",aline);
-                    if (!pswit[OVERVIEW_SWITCH])
-                        printf("    Line %ld column %d - Query standalone %s\n",
-			  linecnt,(int)(wordstart-aline)+2,inword);
-                    else
-                        cnt_word++;
-		}
-	    }
-	}
+	check_for_typos(aline,warnings);
 	/*
          * Look for added or missing spaces around punctuation and quotes.
          * If there is a punctuation character like ! with no space on