Use official HTML 4 character entity definitions
authorali <ali@juiblex.co.uk>
Thu May 30 17:16:37 2013 +0100 (2013-05-30)
changeset 7182d3cc398b54
parent 70 aa916da2e452
child 72 52d4a7f926b4
Use official HTML 4 character entity definitions
bookloupe/HTMLlat1.ent
bookloupe/HTMLspecial.ent
bookloupe/HTMLsymbol.ent
bookloupe/Makefile.am
bookloupe/bookloupe.c
bookloupe/gen-html-entities.sh
configure.ac
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/bookloupe/HTMLlat1.ent	Thu May 30 17:16:37 2013 +0100
     1.3 @@ -0,0 +1,194 @@
     1.4 +<!-- Portions © International Organization for Standardization 1986
     1.5 +     Permission to copy in any form is granted for use with
     1.6 +     conforming SGML systems and applications as defined in
     1.7 +     ISO 8879, provided this notice is included in all copies.
     1.8 +-->
     1.9 +<!-- Character entity set. Typical invocation:
    1.10 +     <!ENTITY % HTMLlat1 PUBLIC
    1.11 +       "-//W3C//ENTITIES Latin 1//EN//HTML">
    1.12 +     %HTMLlat1;
    1.13 +-->
    1.14 +
    1.15 +<!ENTITY nbsp   CDATA "&#160;" -- no-break space = non-breaking space,
    1.16 +                                  U+00A0 ISOnum -->
    1.17 +<!ENTITY iexcl  CDATA "&#161;" -- inverted exclamation mark, U+00A1 ISOnum -->
    1.18 +<!ENTITY cent   CDATA "&#162;" -- cent sign, U+00A2 ISOnum -->
    1.19 +<!ENTITY pound  CDATA "&#163;" -- pound sign, U+00A3 ISOnum -->
    1.20 +<!ENTITY curren CDATA "&#164;" -- currency sign, U+00A4 ISOnum -->
    1.21 +<!ENTITY yen    CDATA "&#165;" -- yen sign = yuan sign, U+00A5 ISOnum -->
    1.22 +<!ENTITY brvbar CDATA "&#166;" -- broken bar = broken vertical bar,
    1.23 +                                  U+00A6 ISOnum -->
    1.24 +<!ENTITY sect   CDATA "&#167;" -- section sign, U+00A7 ISOnum -->
    1.25 +<!ENTITY uml    CDATA "&#168;" -- diaeresis = spacing diaeresis,
    1.26 +                                  U+00A8 ISOdia -->
    1.27 +<!ENTITY copy   CDATA "&#169;" -- copyright sign, U+00A9 ISOnum -->
    1.28 +<!ENTITY ordf   CDATA "&#170;" -- feminine ordinal indicator, U+00AA ISOnum -->
    1.29 +<!ENTITY laquo  CDATA "&#171;" -- left-pointing double angle quotation mark
    1.30 +                                  = left pointing guillemet, U+00AB ISOnum -->
    1.31 +<!ENTITY not    CDATA "&#172;" -- not sign, U+00AC ISOnum -->
    1.32 +<!ENTITY shy    CDATA "&#173;" -- soft hyphen = discretionary hyphen,
    1.33 +                                  U+00AD ISOnum -->
    1.34 +<!ENTITY reg    CDATA "&#174;" -- registered sign = registered trade mark sign,
    1.35 +                                  U+00AE ISOnum -->
    1.36 +<!ENTITY macr   CDATA "&#175;" -- macron = spacing macron = overline
    1.37 +                                  = APL overbar, U+00AF ISOdia -->
    1.38 +<!ENTITY deg    CDATA "&#176;" -- degree sign, U+00B0 ISOnum -->
    1.39 +<!ENTITY plusmn CDATA "&#177;" -- plus-minus sign = plus-or-minus sign,
    1.40 +                                  U+00B1 ISOnum -->
    1.41 +<!ENTITY sup2   CDATA "&#178;" -- superscript two = superscript digit two
    1.42 +                                  = squared, U+00B2 ISOnum -->
    1.43 +<!ENTITY sup3   CDATA "&#179;" -- superscript three = superscript digit three
    1.44 +                                  = cubed, U+00B3 ISOnum -->
    1.45 +<!ENTITY acute  CDATA "&#180;" -- acute accent = spacing acute,
    1.46 +                                  U+00B4 ISOdia -->
    1.47 +<!ENTITY micro  CDATA "&#181;" -- micro sign, U+00B5 ISOnum -->
    1.48 +<!ENTITY para   CDATA "&#182;" -- pilcrow sign = paragraph sign,
    1.49 +                                  U+00B6 ISOnum -->
    1.50 +<!ENTITY middot CDATA "&#183;" -- middle dot = Georgian comma
    1.51 +                                  = Greek middle dot, U+00B7 ISOnum -->
    1.52 +<!ENTITY cedil  CDATA "&#184;" -- cedilla = spacing cedilla, U+00B8 ISOdia -->
    1.53 +<!ENTITY sup1   CDATA "&#185;" -- superscript one = superscript digit one,
    1.54 +                                  U+00B9 ISOnum -->
    1.55 +<!ENTITY ordm   CDATA "&#186;" -- masculine ordinal indicator,
    1.56 +                                  U+00BA ISOnum -->
    1.57 +<!ENTITY raquo  CDATA "&#187;" -- right-pointing double angle quotation mark
    1.58 +                                  = right pointing guillemet, U+00BB ISOnum -->
    1.59 +<!ENTITY frac14 CDATA "&#188;" -- vulgar fraction one quarter
    1.60 +                                  = fraction one quarter, U+00BC ISOnum -->
    1.61 +<!ENTITY frac12 CDATA "&#189;" -- vulgar fraction one half
    1.62 +                                  = fraction one half, U+00BD ISOnum -->
    1.63 +<!ENTITY frac34 CDATA "&#190;" -- vulgar fraction three quarters
    1.64 +                                  = fraction three quarters, U+00BE ISOnum -->
    1.65 +<!ENTITY iquest CDATA "&#191;" -- inverted question mark
    1.66 +                                  = turned question mark, U+00BF ISOnum -->
    1.67 +<!ENTITY Agrave CDATA "&#192;" -- latin capital letter A with grave
    1.68 +                                  = latin capital letter A grave,
    1.69 +                                  U+00C0 ISOlat1 -->
    1.70 +<!ENTITY Aacute CDATA "&#193;" -- latin capital letter A with acute,
    1.71 +                                  U+00C1 ISOlat1 -->
    1.72 +<!ENTITY Acirc  CDATA "&#194;" -- latin capital letter A with circumflex,
    1.73 +                                  U+00C2 ISOlat1 -->
    1.74 +<!ENTITY Atilde CDATA "&#195;" -- latin capital letter A with tilde,
    1.75 +                                  U+00C3 ISOlat1 -->
    1.76 +<!ENTITY Auml   CDATA "&#196;" -- latin capital letter A with diaeresis,
    1.77 +                                  U+00C4 ISOlat1 -->
    1.78 +<!ENTITY Aring  CDATA "&#197;" -- latin capital letter A with ring above
    1.79 +                                  = latin capital letter A ring,
    1.80 +                                  U+00C5 ISOlat1 -->
    1.81 +<!ENTITY AElig  CDATA "&#198;" -- latin capital letter AE
    1.82 +                                  = latin capital ligature AE,
    1.83 +                                  U+00C6 ISOlat1 -->
    1.84 +<!ENTITY Ccedil CDATA "&#199;" -- latin capital letter C with cedilla,
    1.85 +                                  U+00C7 ISOlat1 -->
    1.86 +<!ENTITY Egrave CDATA "&#200;" -- latin capital letter E with grave,
    1.87 +                                  U+00C8 ISOlat1 -->
    1.88 +<!ENTITY Eacute CDATA "&#201;" -- latin capital letter E with acute,
    1.89 +                                  U+00C9 ISOlat1 -->
    1.90 +<!ENTITY Ecirc  CDATA "&#202;" -- latin capital letter E with circumflex,
    1.91 +                                  U+00CA ISOlat1 -->
    1.92 +<!ENTITY Euml   CDATA "&#203;" -- latin capital letter E with diaeresis,
    1.93 +                                  U+00CB ISOlat1 -->
    1.94 +<!ENTITY Igrave CDATA "&#204;" -- latin capital letter I with grave,
    1.95 +                                  U+00CC ISOlat1 -->
    1.96 +<!ENTITY Iacute CDATA "&#205;" -- latin capital letter I with acute,
    1.97 +                                  U+00CD ISOlat1 -->
    1.98 +<!ENTITY Icirc  CDATA "&#206;" -- latin capital letter I with circumflex,
    1.99 +                                  U+00CE ISOlat1 -->
   1.100 +<!ENTITY Iuml   CDATA "&#207;" -- latin capital letter I with diaeresis,
   1.101 +                                  U+00CF ISOlat1 -->
   1.102 +<!ENTITY ETH    CDATA "&#208;" -- latin capital letter ETH, U+00D0 ISOlat1 -->
   1.103 +<!ENTITY Ntilde CDATA "&#209;" -- latin capital letter N with tilde,
   1.104 +                                  U+00D1 ISOlat1 -->
   1.105 +<!ENTITY Ograve CDATA "&#210;" -- latin capital letter O with grave,
   1.106 +                                  U+00D2 ISOlat1 -->
   1.107 +<!ENTITY Oacute CDATA "&#211;" -- latin capital letter O with acute,
   1.108 +                                  U+00D3 ISOlat1 -->
   1.109 +<!ENTITY Ocirc  CDATA "&#212;" -- latin capital letter O with circumflex,
   1.110 +                                  U+00D4 ISOlat1 -->
   1.111 +<!ENTITY Otilde CDATA "&#213;" -- latin capital letter O with tilde,
   1.112 +                                  U+00D5 ISOlat1 -->
   1.113 +<!ENTITY Ouml   CDATA "&#214;" -- latin capital letter O with diaeresis,
   1.114 +                                  U+00D6 ISOlat1 -->
   1.115 +<!ENTITY times  CDATA "&#215;" -- multiplication sign, U+00D7 ISOnum -->
   1.116 +<!ENTITY Oslash CDATA "&#216;" -- latin capital letter O with stroke
   1.117 +                                  = latin capital letter O slash,
   1.118 +                                  U+00D8 ISOlat1 -->
   1.119 +<!ENTITY Ugrave CDATA "&#217;" -- latin capital letter U with grave,
   1.120 +                                  U+00D9 ISOlat1 -->
   1.121 +<!ENTITY Uacute CDATA "&#218;" -- latin capital letter U with acute,
   1.122 +                                  U+00DA ISOlat1 -->
   1.123 +<!ENTITY Ucirc  CDATA "&#219;" -- latin capital letter U with circumflex,
   1.124 +                                  U+00DB ISOlat1 -->
   1.125 +<!ENTITY Uuml   CDATA "&#220;" -- latin capital letter U with diaeresis,
   1.126 +                                  U+00DC ISOlat1 -->
   1.127 +<!ENTITY Yacute CDATA "&#221;" -- latin capital letter Y with acute,
   1.128 +                                  U+00DD ISOlat1 -->
   1.129 +<!ENTITY THORN  CDATA "&#222;" -- latin capital letter THORN,
   1.130 +                                  U+00DE ISOlat1 -->
   1.131 +<!ENTITY szlig  CDATA "&#223;" -- latin small letter sharp s = ess-zed,
   1.132 +                                  U+00DF ISOlat1 -->
   1.133 +<!ENTITY agrave CDATA "&#224;" -- latin small letter a with grave
   1.134 +                                  = latin small letter a grave,
   1.135 +                                  U+00E0 ISOlat1 -->
   1.136 +<!ENTITY aacute CDATA "&#225;" -- latin small letter a with acute,
   1.137 +                                  U+00E1 ISOlat1 -->
   1.138 +<!ENTITY acirc  CDATA "&#226;" -- latin small letter a with circumflex,
   1.139 +                                  U+00E2 ISOlat1 -->
   1.140 +<!ENTITY atilde CDATA "&#227;" -- latin small letter a with tilde,
   1.141 +                                  U+00E3 ISOlat1 -->
   1.142 +<!ENTITY auml   CDATA "&#228;" -- latin small letter a with diaeresis,
   1.143 +                                  U+00E4 ISOlat1 -->
   1.144 +<!ENTITY aring  CDATA "&#229;" -- latin small letter a with ring above
   1.145 +                                  = latin small letter a ring,
   1.146 +                                  U+00E5 ISOlat1 -->
   1.147 +<!ENTITY aelig  CDATA "&#230;" -- latin small letter ae
   1.148 +                                  = latin small ligature ae, U+00E6 ISOlat1 -->
   1.149 +<!ENTITY ccedil CDATA "&#231;" -- latin small letter c with cedilla,
   1.150 +                                  U+00E7 ISOlat1 -->
   1.151 +<!ENTITY egrave CDATA "&#232;" -- latin small letter e with grave,
   1.152 +                                  U+00E8 ISOlat1 -->
   1.153 +<!ENTITY eacute CDATA "&#233;" -- latin small letter e with acute,
   1.154 +                                  U+00E9 ISOlat1 -->
   1.155 +<!ENTITY ecirc  CDATA "&#234;" -- latin small letter e with circumflex,
   1.156 +                                  U+00EA ISOlat1 -->
   1.157 +<!ENTITY euml   CDATA "&#235;" -- latin small letter e with diaeresis,
   1.158 +                                  U+00EB ISOlat1 -->
   1.159 +<!ENTITY igrave CDATA "&#236;" -- latin small letter i with grave,
   1.160 +                                  U+00EC ISOlat1 -->
   1.161 +<!ENTITY iacute CDATA "&#237;" -- latin small letter i with acute,
   1.162 +                                  U+00ED ISOlat1 -->
   1.163 +<!ENTITY icirc  CDATA "&#238;" -- latin small letter i with circumflex,
   1.164 +                                  U+00EE ISOlat1 -->
   1.165 +<!ENTITY iuml   CDATA "&#239;" -- latin small letter i with diaeresis,
   1.166 +                                  U+00EF ISOlat1 -->
   1.167 +<!ENTITY eth    CDATA "&#240;" -- latin small letter eth, U+00F0 ISOlat1 -->
   1.168 +<!ENTITY ntilde CDATA "&#241;" -- latin small letter n with tilde,
   1.169 +                                  U+00F1 ISOlat1 -->
   1.170 +<!ENTITY ograve CDATA "&#242;" -- latin small letter o with grave,
   1.171 +                                  U+00F2 ISOlat1 -->
   1.172 +<!ENTITY oacute CDATA "&#243;" -- latin small letter o with acute,
   1.173 +                                  U+00F3 ISOlat1 -->
   1.174 +<!ENTITY ocirc  CDATA "&#244;" -- latin small letter o with circumflex,
   1.175 +                                  U+00F4 ISOlat1 -->
   1.176 +<!ENTITY otilde CDATA "&#245;" -- latin small letter o with tilde,
   1.177 +                                  U+00F5 ISOlat1 -->
   1.178 +<!ENTITY ouml   CDATA "&#246;" -- latin small letter o with diaeresis,
   1.179 +                                  U+00F6 ISOlat1 -->
   1.180 +<!ENTITY divide CDATA "&#247;" -- division sign, U+00F7 ISOnum -->
   1.181 +<!ENTITY oslash CDATA "&#248;" -- latin small letter o with stroke,
   1.182 +                                  = latin small letter o slash,
   1.183 +                                  U+00F8 ISOlat1 -->
   1.184 +<!ENTITY ugrave CDATA "&#249;" -- latin small letter u with grave,
   1.185 +                                  U+00F9 ISOlat1 -->
   1.186 +<!ENTITY uacute CDATA "&#250;" -- latin small letter u with acute,
   1.187 +                                  U+00FA ISOlat1 -->
   1.188 +<!ENTITY ucirc  CDATA "&#251;" -- latin small letter u with circumflex,
   1.189 +                                  U+00FB ISOlat1 -->
   1.190 +<!ENTITY uuml   CDATA "&#252;" -- latin small letter u with diaeresis,
   1.191 +                                  U+00FC ISOlat1 -->
   1.192 +<!ENTITY yacute CDATA "&#253;" -- latin small letter y with acute,
   1.193 +                                  U+00FD ISOlat1 -->
   1.194 +<!ENTITY thorn  CDATA "&#254;" -- latin small letter thorn,
   1.195 +                                  U+00FE ISOlat1 -->
   1.196 +<!ENTITY yuml   CDATA "&#255;" -- latin small letter y with diaeresis,
   1.197 +                                  U+00FF ISOlat1 -->
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/bookloupe/HTMLspecial.ent	Thu May 30 17:16:37 2013 +0100
     2.3 @@ -0,0 +1,77 @@
     2.4 +<!-- Special characters for HTML -->
     2.5 +
     2.6 +<!-- Character entity set. Typical invocation:
     2.7 +     <!ENTITY % HTMLspecial PUBLIC
     2.8 +       "-//W3C//ENTITIES Special//EN//HTML">
     2.9 +     %HTMLspecial; -->
    2.10 +
    2.11 +<!-- Portions © International Organization for Standardization 1986:
    2.12 +     Permission to copy in any form is granted for use with
    2.13 +     conforming SGML systems and applications as defined in
    2.14 +     ISO 8879, provided this notice is included in all copies.
    2.15 +-->
    2.16 +
    2.17 +<!-- Relevant ISO entity set is given unless names are newly introduced.
    2.18 +     New names (i.e., not in ISO 8879 list) do not clash with any
    2.19 +     existing ISO 8879 entity names. ISO 10646 character numbers
    2.20 +     are given for each character, in hex. CDATA values are decimal
    2.21 +     conversions of the ISO 10646 values and refer to the document
    2.22 +     character set. Names are ISO 10646 names. 
    2.23 +
    2.24 +-->
    2.25 +
    2.26 +<!-- C0 Controls and Basic Latin -->
    2.27 +<!ENTITY quot    CDATA "&#34;"   -- quotation mark = APL quote,
    2.28 +                                    U+0022 ISOnum -->
    2.29 +<!ENTITY amp     CDATA "&#38;"   -- ampersand, U+0026 ISOnum -->
    2.30 +<!ENTITY lt      CDATA "&#60;"   -- less-than sign, U+003C ISOnum -->
    2.31 +<!ENTITY gt      CDATA "&#62;"   -- greater-than sign, U+003E ISOnum -->
    2.32 +
    2.33 +<!-- Latin Extended-A -->
    2.34 +<!ENTITY OElig   CDATA "&#338;"  -- latin capital ligature OE,
    2.35 +                                    U+0152 ISOlat2 -->
    2.36 +<!ENTITY oelig   CDATA "&#339;"  -- latin small ligature oe, U+0153 ISOlat2 -->
    2.37 +<!-- ligature is a misnomer, this is a separate character in some languages -->
    2.38 +<!ENTITY Scaron  CDATA "&#352;"  -- latin capital letter S with caron,
    2.39 +                                    U+0160 ISOlat2 -->
    2.40 +<!ENTITY scaron  CDATA "&#353;"  -- latin small letter s with caron,
    2.41 +                                    U+0161 ISOlat2 -->
    2.42 +<!ENTITY Yuml    CDATA "&#376;"  -- latin capital letter Y with diaeresis,
    2.43 +                                    U+0178 ISOlat2 -->
    2.44 +
    2.45 +<!-- Spacing Modifier Letters -->
    2.46 +<!ENTITY circ    CDATA "&#710;"  -- modifier letter circumflex accent,
    2.47 +                                    U+02C6 ISOpub -->
    2.48 +<!ENTITY tilde   CDATA "&#732;"  -- small tilde, U+02DC ISOdia -->
    2.49 +
    2.50 +<!-- General Punctuation -->
    2.51 +<!ENTITY ensp    CDATA "&#8194;" -- en space, U+2002 ISOpub -->
    2.52 +<!ENTITY emsp    CDATA "&#8195;" -- em space, U+2003 ISOpub -->
    2.53 +<!ENTITY thinsp  CDATA "&#8201;" -- thin space, U+2009 ISOpub -->
    2.54 +<!ENTITY zwnj    CDATA "&#8204;" -- zero width non-joiner,
    2.55 +                                    U+200C NEW RFC 2070 -->
    2.56 +<!ENTITY zwj     CDATA "&#8205;" -- zero width joiner, U+200D NEW RFC 2070 -->
    2.57 +<!ENTITY lrm     CDATA "&#8206;" -- left-to-right mark, U+200E NEW RFC 2070 -->
    2.58 +<!ENTITY rlm     CDATA "&#8207;" -- right-to-left mark, U+200F NEW RFC 2070 -->
    2.59 +<!ENTITY ndash   CDATA "&#8211;" -- en dash, U+2013 ISOpub -->
    2.60 +<!ENTITY mdash   CDATA "&#8212;" -- em dash, U+2014 ISOpub -->
    2.61 +<!ENTITY lsquo   CDATA "&#8216;" -- left single quotation mark,
    2.62 +                                    U+2018 ISOnum -->
    2.63 +<!ENTITY rsquo   CDATA "&#8217;" -- right single quotation mark,
    2.64 +                                    U+2019 ISOnum -->
    2.65 +<!ENTITY sbquo   CDATA "&#8218;" -- single low-9 quotation mark, U+201A NEW -->
    2.66 +<!ENTITY ldquo   CDATA "&#8220;" -- left double quotation mark,
    2.67 +                                    U+201C ISOnum -->
    2.68 +<!ENTITY rdquo   CDATA "&#8221;" -- right double quotation mark,
    2.69 +                                    U+201D ISOnum -->
    2.70 +<!ENTITY bdquo   CDATA "&#8222;" -- double low-9 quotation mark, U+201E NEW -->
    2.71 +<!ENTITY dagger  CDATA "&#8224;" -- dagger, U+2020 ISOpub -->
    2.72 +<!ENTITY Dagger  CDATA "&#8225;" -- double dagger, U+2021 ISOpub -->
    2.73 +<!ENTITY permil  CDATA "&#8240;" -- per mille sign, U+2030 ISOtech -->
    2.74 +<!ENTITY lsaquo  CDATA "&#8249;" -- single left-pointing angle quotation mark,
    2.75 +                                    U+2039 ISO proposed -->
    2.76 +<!-- lsaquo is proposed but not yet ISO standardized -->
    2.77 +<!ENTITY rsaquo  CDATA "&#8250;" -- single right-pointing angle quotation mark,
    2.78 +                                    U+203A ISO proposed -->
    2.79 +<!-- rsaquo is proposed but not yet ISO standardized -->
    2.80 +<!ENTITY euro   CDATA "&#8364;"  -- euro sign, U+20AC NEW -->
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/bookloupe/HTMLsymbol.ent	Thu May 30 17:16:37 2013 +0100
     3.3 @@ -0,0 +1,241 @@
     3.4 +<!-- Mathematical, Greek and Symbolic characters for HTML -->
     3.5 +
     3.6 +<!-- Character entity set. Typical invocation:
     3.7 +     <!ENTITY % HTMLsymbol PUBLIC
     3.8 +       "-//W3C//ENTITIES Symbols//EN//HTML">
     3.9 +     %HTMLsymbol; -->
    3.10 +
    3.11 +<!-- Portions © International Organization for Standardization 1986:
    3.12 +     Permission to copy in any form is granted for use with
    3.13 +     conforming SGML systems and applications as defined in
    3.14 +     ISO 8879, provided this notice is included in all copies.
    3.15 +-->
    3.16 +
    3.17 +<!-- Relevant ISO entity set is given unless names are newly introduced.
    3.18 +     New names (i.e., not in ISO 8879 list) do not clash with any
    3.19 +     existing ISO 8879 entity names. ISO 10646 character numbers
    3.20 +     are given for each character, in hex. CDATA values are decimal
    3.21 +     conversions of the ISO 10646 values and refer to the document
    3.22 +     character set. Names are ISO 10646 names. 
    3.23 +
    3.24 +-->
    3.25 +
    3.26 +<!-- Latin Extended-B -->
    3.27 +<!ENTITY fnof     CDATA "&#402;" -- latin small f with hook = function
    3.28 +                                    = florin, U+0192 ISOtech -->
    3.29 +
    3.30 +<!-- Greek -->
    3.31 +<!ENTITY Alpha    CDATA "&#913;" -- greek capital letter alpha, U+0391 -->
    3.32 +<!ENTITY Beta     CDATA "&#914;" -- greek capital letter beta, U+0392 -->
    3.33 +<!ENTITY Gamma    CDATA "&#915;" -- greek capital letter gamma,
    3.34 +                                    U+0393 ISOgrk3 -->
    3.35 +<!ENTITY Delta    CDATA "&#916;" -- greek capital letter delta,
    3.36 +                                    U+0394 ISOgrk3 -->
    3.37 +<!ENTITY Epsilon  CDATA "&#917;" -- greek capital letter epsilon, U+0395 -->
    3.38 +<!ENTITY Zeta     CDATA "&#918;" -- greek capital letter zeta, U+0396 -->
    3.39 +<!ENTITY Eta      CDATA "&#919;" -- greek capital letter eta, U+0397 -->
    3.40 +<!ENTITY Theta    CDATA "&#920;" -- greek capital letter theta,
    3.41 +                                    U+0398 ISOgrk3 -->
    3.42 +<!ENTITY Iota     CDATA "&#921;" -- greek capital letter iota, U+0399 -->
    3.43 +<!ENTITY Kappa    CDATA "&#922;" -- greek capital letter kappa, U+039A -->
    3.44 +<!ENTITY Lambda   CDATA "&#923;" -- greek capital letter lambda,
    3.45 +                                    U+039B ISOgrk3 -->
    3.46 +<!ENTITY Mu       CDATA "&#924;" -- greek capital letter mu, U+039C -->
    3.47 +<!ENTITY Nu       CDATA "&#925;" -- greek capital letter nu, U+039D -->
    3.48 +<!ENTITY Xi       CDATA "&#926;" -- greek capital letter xi, U+039E ISOgrk3 -->
    3.49 +<!ENTITY Omicron  CDATA "&#927;" -- greek capital letter omicron, U+039F -->
    3.50 +<!ENTITY Pi       CDATA "&#928;" -- greek capital letter pi, U+03A0 ISOgrk3 -->
    3.51 +<!ENTITY Rho      CDATA "&#929;" -- greek capital letter rho, U+03A1 -->
    3.52 +<!-- there is no Sigmaf, and no U+03A2 character either -->
    3.53 +<!ENTITY Sigma    CDATA "&#931;" -- greek capital letter sigma,
    3.54 +                                    U+03A3 ISOgrk3 -->
    3.55 +<!ENTITY Tau      CDATA "&#932;" -- greek capital letter tau, U+03A4 -->
    3.56 +<!ENTITY Upsilon  CDATA "&#933;" -- greek capital letter upsilon,
    3.57 +                                    U+03A5 ISOgrk3 -->
    3.58 +<!ENTITY Phi      CDATA "&#934;" -- greek capital letter phi,
    3.59 +                                    U+03A6 ISOgrk3 -->
    3.60 +<!ENTITY Chi      CDATA "&#935;" -- greek capital letter chi, U+03A7 -->
    3.61 +<!ENTITY Psi      CDATA "&#936;" -- greek capital letter psi,
    3.62 +                                    U+03A8 ISOgrk3 -->
    3.63 +<!ENTITY Omega    CDATA "&#937;" -- greek capital letter omega,
    3.64 +                                    U+03A9 ISOgrk3 -->
    3.65 +
    3.66 +<!ENTITY alpha    CDATA "&#945;" -- greek small letter alpha,
    3.67 +                                    U+03B1 ISOgrk3 -->
    3.68 +<!ENTITY beta     CDATA "&#946;" -- greek small letter beta, U+03B2 ISOgrk3 -->
    3.69 +<!ENTITY gamma    CDATA "&#947;" -- greek small letter gamma,
    3.70 +                                    U+03B3 ISOgrk3 -->
    3.71 +<!ENTITY delta    CDATA "&#948;" -- greek small letter delta,
    3.72 +                                    U+03B4 ISOgrk3 -->
    3.73 +<!ENTITY epsilon  CDATA "&#949;" -- greek small letter epsilon,
    3.74 +                                    U+03B5 ISOgrk3 -->
    3.75 +<!ENTITY zeta     CDATA "&#950;" -- greek small letter zeta, U+03B6 ISOgrk3 -->
    3.76 +<!ENTITY eta      CDATA "&#951;" -- greek small letter eta, U+03B7 ISOgrk3 -->
    3.77 +<!ENTITY theta    CDATA "&#952;" -- greek small letter theta,
    3.78 +                                    U+03B8 ISOgrk3 -->
    3.79 +<!ENTITY iota     CDATA "&#953;" -- greek small letter iota, U+03B9 ISOgrk3 -->
    3.80 +<!ENTITY kappa    CDATA "&#954;" -- greek small letter kappa,
    3.81 +                                    U+03BA ISOgrk3 -->
    3.82 +<!ENTITY lambda   CDATA "&#955;" -- greek small letter lambda,
    3.83 +                                    U+03BB ISOgrk3 -->
    3.84 +<!ENTITY mu       CDATA "&#956;" -- greek small letter mu, U+03BC ISOgrk3 -->
    3.85 +<!ENTITY nu       CDATA "&#957;" -- greek small letter nu, U+03BD ISOgrk3 -->
    3.86 +<!ENTITY xi       CDATA "&#958;" -- greek small letter xi, U+03BE ISOgrk3 -->
    3.87 +<!ENTITY omicron  CDATA "&#959;" -- greek small letter omicron, U+03BF NEW -->
    3.88 +<!ENTITY pi       CDATA "&#960;" -- greek small letter pi, U+03C0 ISOgrk3 -->
    3.89 +<!ENTITY rho      CDATA "&#961;" -- greek small letter rho, U+03C1 ISOgrk3 -->
    3.90 +<!ENTITY sigmaf   CDATA "&#962;" -- greek small letter final sigma,
    3.91 +                                    U+03C2 ISOgrk3 -->
    3.92 +<!ENTITY sigma    CDATA "&#963;" -- greek small letter sigma,
    3.93 +                                    U+03C3 ISOgrk3 -->
    3.94 +<!ENTITY tau      CDATA "&#964;" -- greek small letter tau, U+03C4 ISOgrk3 -->
    3.95 +<!ENTITY upsilon  CDATA "&#965;" -- greek small letter upsilon,
    3.96 +                                    U+03C5 ISOgrk3 -->
    3.97 +<!ENTITY phi      CDATA "&#966;" -- greek small letter phi, U+03C6 ISOgrk3 -->
    3.98 +<!ENTITY chi      CDATA "&#967;" -- greek small letter chi, U+03C7 ISOgrk3 -->
    3.99 +<!ENTITY psi      CDATA "&#968;" -- greek small letter psi, U+03C8 ISOgrk3 -->
   3.100 +<!ENTITY omega    CDATA "&#969;" -- greek small letter omega,
   3.101 +                                    U+03C9 ISOgrk3 -->
   3.102 +<!ENTITY thetasym CDATA "&#977;" -- greek small letter theta symbol,
   3.103 +                                    U+03D1 NEW -->
   3.104 +<!ENTITY upsih    CDATA "&#978;" -- greek upsilon with hook symbol,
   3.105 +                                    U+03D2 NEW -->
   3.106 +<!ENTITY piv      CDATA "&#982;" -- greek pi symbol, U+03D6 ISOgrk3 -->
   3.107 +
   3.108 +<!-- General Punctuation -->
   3.109 +<!ENTITY bull     CDATA "&#8226;" -- bullet = black small circle,
   3.110 +                                     U+2022 ISOpub  -->
   3.111 +<!-- bullet is NOT the same as bullet operator, U+2219 -->
   3.112 +<!ENTITY hellip   CDATA "&#8230;" -- horizontal ellipsis = three dot leader,
   3.113 +                                     U+2026 ISOpub  -->
   3.114 +<!ENTITY prime    CDATA "&#8242;" -- prime = minutes = feet, U+2032 ISOtech -->
   3.115 +<!ENTITY Prime    CDATA "&#8243;" -- double prime = seconds = inches,
   3.116 +                                     U+2033 ISOtech -->
   3.117 +<!ENTITY oline    CDATA "&#8254;" -- overline = spacing overscore,
   3.118 +                                     U+203E NEW -->
   3.119 +<!ENTITY frasl    CDATA "&#8260;" -- fraction slash, U+2044 NEW -->
   3.120 +
   3.121 +<!-- Letterlike Symbols -->
   3.122 +<!ENTITY weierp   CDATA "&#8472;" -- script capital P = power set
   3.123 +                                     = Weierstrass p, U+2118 ISOamso -->
   3.124 +<!ENTITY image    CDATA "&#8465;" -- blackletter capital I = imaginary part,
   3.125 +                                     U+2111 ISOamso -->
   3.126 +<!ENTITY real     CDATA "&#8476;" -- blackletter capital R = real part symbol,
   3.127 +                                     U+211C ISOamso -->
   3.128 +<!ENTITY trade    CDATA "&#8482;" -- trade mark sign, U+2122 ISOnum -->
   3.129 +<!ENTITY alefsym  CDATA "&#8501;" -- alef symbol = first transfinite cardinal,
   3.130 +                                     U+2135 NEW -->
   3.131 +<!-- alef symbol is NOT the same as hebrew letter alef,
   3.132 +     U+05D0 although the same glyph could be used to depict both characters -->
   3.133 +
   3.134 +<!-- Arrows -->
   3.135 +<!ENTITY larr     CDATA "&#8592;" -- leftwards arrow, U+2190 ISOnum -->
   3.136 +<!ENTITY uarr     CDATA "&#8593;" -- upwards arrow, U+2191 ISOnum-->
   3.137 +<!ENTITY rarr     CDATA "&#8594;" -- rightwards arrow, U+2192 ISOnum -->
   3.138 +<!ENTITY darr     CDATA "&#8595;" -- downwards arrow, U+2193 ISOnum -->
   3.139 +<!ENTITY harr     CDATA "&#8596;" -- left right arrow, U+2194 ISOamsa -->
   3.140 +<!ENTITY crarr    CDATA "&#8629;" -- downwards arrow with corner leftwards
   3.141 +                                     = carriage return, U+21B5 NEW -->
   3.142 +<!ENTITY lArr     CDATA "&#8656;" -- leftwards double arrow, U+21D0 ISOtech -->
   3.143 +<!-- ISO 10646 does not say that lArr is the same as the 'is implied by' arrow
   3.144 +    but also does not have any other character for that function. So ? lArr can
   3.145 +    be used for 'is implied by' as ISOtech suggests -->
   3.146 +<!ENTITY uArr     CDATA "&#8657;" -- upwards double arrow, U+21D1 ISOamsa -->
   3.147 +<!ENTITY rArr     CDATA "&#8658;" -- rightwards double arrow,
   3.148 +                                     U+21D2 ISOtech -->
   3.149 +<!-- ISO 10646 does not say this is the 'implies' character but does not have 
   3.150 +     another character with this function so ?
   3.151 +     rArr can be used for 'implies' as ISOtech suggests -->
   3.152 +<!ENTITY dArr     CDATA "&#8659;" -- downwards double arrow, U+21D3 ISOamsa -->
   3.153 +<!ENTITY hArr     CDATA "&#8660;" -- left right double arrow,
   3.154 +                                     U+21D4 ISOamsa -->
   3.155 +
   3.156 +<!-- Mathematical Operators -->
   3.157 +<!ENTITY forall   CDATA "&#8704;" -- for all, U+2200 ISOtech -->
   3.158 +<!ENTITY part     CDATA "&#8706;" -- partial differential, U+2202 ISOtech  -->
   3.159 +<!ENTITY exist    CDATA "&#8707;" -- there exists, U+2203 ISOtech -->
   3.160 +<!ENTITY empty    CDATA "&#8709;" -- empty set = null set = diameter,
   3.161 +                                     U+2205 ISOamso -->
   3.162 +<!ENTITY nabla    CDATA "&#8711;" -- nabla = backward difference,
   3.163 +                                     U+2207 ISOtech -->
   3.164 +<!ENTITY isin     CDATA "&#8712;" -- element of, U+2208 ISOtech -->
   3.165 +<!ENTITY notin    CDATA "&#8713;" -- not an element of, U+2209 ISOtech -->
   3.166 +<!ENTITY ni       CDATA "&#8715;" -- contains as member, U+220B ISOtech -->
   3.167 +<!-- should there be a more memorable name than 'ni'? -->
   3.168 +<!ENTITY prod     CDATA "&#8719;" -- n-ary product = product sign,
   3.169 +                                     U+220F ISOamsb -->
   3.170 +<!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though
   3.171 +     the same glyph might be used for both -->
   3.172 +<!ENTITY sum      CDATA "&#8721;" -- n-ary sumation, U+2211 ISOamsb -->
   3.173 +<!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
   3.174 +     though the same glyph might be used for both -->
   3.175 +<!ENTITY minus    CDATA "&#8722;" -- minus sign, U+2212 ISOtech -->
   3.176 +<!ENTITY lowast   CDATA "&#8727;" -- asterisk operator, U+2217 ISOtech -->
   3.177 +<!ENTITY radic    CDATA "&#8730;" -- square root = radical sign,
   3.178 +                                     U+221A ISOtech -->
   3.179 +<!ENTITY prop     CDATA "&#8733;" -- proportional to, U+221D ISOtech -->
   3.180 +<!ENTITY infin    CDATA "&#8734;" -- infinity, U+221E ISOtech -->
   3.181 +<!ENTITY ang      CDATA "&#8736;" -- angle, U+2220 ISOamso -->
   3.182 +<!ENTITY and      CDATA "&#8743;" -- logical and = wedge, U+2227 ISOtech -->
   3.183 +<!ENTITY or       CDATA "&#8744;" -- logical or = vee, U+2228 ISOtech -->
   3.184 +<!ENTITY cap      CDATA "&#8745;" -- intersection = cap, U+2229 ISOtech -->
   3.185 +<!ENTITY cup      CDATA "&#8746;" -- union = cup, U+222A ISOtech -->
   3.186 +<!ENTITY int      CDATA "&#8747;" -- integral, U+222B ISOtech -->
   3.187 +<!ENTITY there4   CDATA "&#8756;" -- therefore, U+2234 ISOtech -->
   3.188 +<!ENTITY sim      CDATA "&#8764;" -- tilde operator = varies with = similar to,
   3.189 +                                     U+223C ISOtech -->
   3.190 +<!-- tilde operator is NOT the same character as the tilde, U+007E,
   3.191 +     although the same glyph might be used to represent both  -->
   3.192 +<!ENTITY cong     CDATA "&#8773;" -- approximately equal to, U+2245 ISOtech -->
   3.193 +<!ENTITY asymp    CDATA "&#8776;" -- almost equal to = asymptotic to,
   3.194 +                                     U+2248 ISOamsr -->
   3.195 +<!ENTITY ne       CDATA "&#8800;" -- not equal to, U+2260 ISOtech -->
   3.196 +<!ENTITY equiv    CDATA "&#8801;" -- identical to, U+2261 ISOtech -->
   3.197 +<!ENTITY le       CDATA "&#8804;" -- less-than or equal to, U+2264 ISOtech -->
   3.198 +<!ENTITY ge       CDATA "&#8805;" -- greater-than or equal to,
   3.199 +                                     U+2265 ISOtech -->
   3.200 +<!ENTITY sub      CDATA "&#8834;" -- subset of, U+2282 ISOtech -->
   3.201 +<!ENTITY sup      CDATA "&#8835;" -- superset of, U+2283 ISOtech -->
   3.202 +<!-- note that nsup, 'not a superset of, U+2283' is not covered by the Symbol 
   3.203 +     font encoding and is not included. Should it be, for symmetry?
   3.204 +     It is in ISOamsn  --> 
   3.205 +<!ENTITY nsub     CDATA "&#8836;" -- not a subset of, U+2284 ISOamsn -->
   3.206 +<!ENTITY sube     CDATA "&#8838;" -- subset of or equal to, U+2286 ISOtech -->
   3.207 +<!ENTITY supe     CDATA "&#8839;" -- superset of or equal to,
   3.208 +                                     U+2287 ISOtech -->
   3.209 +<!ENTITY oplus    CDATA "&#8853;" -- circled plus = direct sum,
   3.210 +                                     U+2295 ISOamsb -->
   3.211 +<!ENTITY otimes   CDATA "&#8855;" -- circled times = vector product,
   3.212 +                                     U+2297 ISOamsb -->
   3.213 +<!ENTITY perp     CDATA "&#8869;" -- up tack = orthogonal to = perpendicular,
   3.214 +                                     U+22A5 ISOtech -->
   3.215 +<!ENTITY sdot     CDATA "&#8901;" -- dot operator, U+22C5 ISOamsb -->
   3.216 +<!-- dot operator is NOT the same character as U+00B7 middle dot -->
   3.217 +
   3.218 +<!-- Miscellaneous Technical -->
   3.219 +<!ENTITY lceil    CDATA "&#8968;" -- left ceiling = apl upstile,
   3.220 +                                     U+2308 ISOamsc  -->
   3.221 +<!ENTITY rceil    CDATA "&#8969;" -- right ceiling, U+2309 ISOamsc  -->
   3.222 +<!ENTITY lfloor   CDATA "&#8970;" -- left floor = apl downstile,
   3.223 +                                     U+230A ISOamsc  -->
   3.224 +<!ENTITY rfloor   CDATA "&#8971;" -- right floor, U+230B ISOamsc  -->
   3.225 +<!ENTITY lang     CDATA "&#9001;" -- left-pointing angle bracket = bra,
   3.226 +                                     U+2329 ISOtech -->
   3.227 +<!-- lang is NOT the same character as U+003C 'less than' 
   3.228 +     or U+2039 'single left-pointing angle quotation mark' -->
   3.229 +<!ENTITY rang     CDATA "&#9002;" -- right-pointing angle bracket = ket,
   3.230 +                                     U+232A ISOtech -->
   3.231 +<!-- rang is NOT the same character as U+003E 'greater than' 
   3.232 +     or U+203A 'single right-pointing angle quotation mark' -->
   3.233 +
   3.234 +<!-- Geometric Shapes -->
   3.235 +<!ENTITY loz      CDATA "&#9674;" -- lozenge, U+25CA ISOpub -->
   3.236 +
   3.237 +<!-- Miscellaneous Symbols -->
   3.238 +<!ENTITY spades   CDATA "&#9824;" -- black spade suit, U+2660 ISOpub -->
   3.239 +<!-- black here seems to mean filled as opposed to hollow -->
   3.240 +<!ENTITY clubs    CDATA "&#9827;" -- black club suit = shamrock,
   3.241 +                                     U+2663 ISOpub -->
   3.242 +<!ENTITY hearts   CDATA "&#9829;" -- black heart suit = valentine,
   3.243 +                                     U+2665 ISOpub -->
   3.244 +<!ENTITY diams    CDATA "&#9830;" -- black diamond suit, U+2666 ISOpub -->
     4.1 --- a/bookloupe/Makefile.am	Thu May 30 07:31:24 2013 +0100
     4.2 +++ b/bookloupe/Makefile.am	Thu May 30 17:16:37 2013 +0100
     4.3 @@ -8,5 +8,12 @@
     4.4  bookloupe.typ:	bookloupe.typ.in
     4.5  	sed 's/$$/\r/' $< > $@
     4.6  
     4.7 -EXTRA_DIST=bookloupe.typ.in
     4.8 -CLEANFILES=bookloupe.typ
     4.9 +HTMLentities.h:	gen-html-entities.sh HTMLlat1.ent HTMLsymbol.ent HTMLspecial.ent
    4.10 +	sh ${srcdir}/gen-html-entities.sh ${srcdir}/HTMLlat1.ent \
    4.11 +	  ${srcdir}/HTMLsymbol.ent ${srcdir}/HTMLspecial.ent > $@
    4.12 +
    4.13 +bookloupe.$(OBJEXT): HTMLentities.h
    4.14 +
    4.15 +EXTRA_DIST=bookloupe.typ.in gen-html-entities.sh HTMLlat1.ent HTMLsymbol.ent \
    4.16 +	HTMLspecial.ent
    4.17 +CLEANFILES=bookloupe.typ HTMLentities.h
     5.1 --- a/bookloupe/bookloupe.c	Thu May 30 07:31:24 2013 +0100
     5.2 +++ b/bookloupe/bookloupe.c	Thu May 30 17:16:37 2013 +0100
     5.3 @@ -24,6 +24,7 @@
     5.4  #include <ctype.h>
     5.5  #include <glib.h>
     5.6  #include <bl/bl.h>
     5.7 +#include "HTMLentities.h"
     5.8  
     5.9  gchar *prevline;
    5.10  
    5.11 @@ -119,132 +120,6 @@
    5.12      "among", "those", "into", "whom", "having", "thence", ""
    5.13  }; 
    5.14  
    5.15 -struct {
    5.16 -    char *htmlent;
    5.17 -    char *htmlnum;
    5.18 -    char *textent;
    5.19 -} entities[] = {
    5.20 -    "&amp;",	"&#38;",     "&", 
    5.21 -    "&lt;",	"&#60;",     "<",
    5.22 -    "&gt;",	"&#62;",     ">",
    5.23 -    "&deg;",	"&#176;",    " degrees",
    5.24 -    "&pound;",	"&#163;",    "L",
    5.25 -    "&quot;",	"&#34;",     "\"", /* quotation mark = APL quote */
    5.26 -    "&OElig;",	"&#338;",    "OE", /* latin capital ligature OE */
    5.27 -    "&oelig;",	"&#339;",    "oe", /* latin small ligature oe */
    5.28 -    "&Scaron;",	"&#352;",    "S", /* latin capital letter S with caron */
    5.29 -    "&scaron;",	"&#353;",    "s", /* latin small letter s with caron */
    5.30 -    "&Yuml;",	"&#376;",    "Y", /* latin capital letter Y with diaeresis */
    5.31 -    "&circ;",	"&#710;",    "",  /* modifier letter circumflex accent */
    5.32 -    "&tilde;",	"&#732;",    "~", /* small tilde, U+02DC ISOdia */
    5.33 -    "&ensp;",	"&#8194;",   " ", /* en space, U+2002 ISOpub */
    5.34 -    "&emsp;",	"&#8195;",   " ", /* em space, U+2003 ISOpub */
    5.35 -    "&thinsp;",	"&#8201;",   " ", /* thin space, U+2009 ISOpub */
    5.36 -    "&ndash;",	"&#8211;",   "-", /* en dash, U+2013 ISOpub */
    5.37 -    "&mdash;",	"&#8212;",   "--", /* em dash, U+2014 ISOpub */
    5.38 -    "&rsquo;",	"&#8217;",   "'", /* right single quotation mark */
    5.39 -    "&sbquo;",	"&#8218;",   "'", /* single low-9 quotation mark */
    5.40 -    "&ldquo;",	"&#8220;",   "\"", /* left double quotation mark */
    5.41 -    "&rdquo;",	"&#8221;",   "\"", /* right double quotation mark */
    5.42 -    "&bdquo;",	"&#8222;",   "\"", /* double low-9 quotation mark */
    5.43 -    "&lsaquo;",	"&#8249;",   "\"", /* single left-pointing angle quotation mark */
    5.44 -    "&rsaquo;",	"&#8250;",   "\"", /* single right-pointing angle quotation mark */
    5.45 -    "&nbsp;",	"&#160;",    " ", /* no-break space = non-breaking space, */
    5.46 -    "&iexcl;",	"&#161;",    "!", /* inverted exclamation mark */
    5.47 -    "&cent;",	"&#162;",    "c", /* cent sign */
    5.48 -    "&pound;",	"&#163;",    "L", /* pound sign */
    5.49 -    "&curren;",	"&#164;",    "$", /* currency sign */
    5.50 -    "&yen;",	"&#165;",    "Y", /* yen sign = yuan sign */
    5.51 -    "&sect;",	"&#167;",    "--", /* section sign */
    5.52 -    "&uml;",	"&#168;",    " ", /* diaeresis = spacing diaeresis */
    5.53 -    "&copy;",	"&#169;",    "(C) ", /* copyright sign */
    5.54 -    "&ordf;",	"&#170;",    " ", /* feminine ordinal indicator */
    5.55 -    "&laquo;",	"&#171;",    "\"", /* left-pointing double angle quotation mark */
    5.56 -    "&shy;",	"&#173;",    "-", /* soft hyphen = discretionary hyphen */
    5.57 -    "&reg;",	"&#174;",    "(R) ", /* registered sign = registered trade mark sign */
    5.58 -    "&macr;",	"&#175;",    " ", /* macron = spacing macron = overline */
    5.59 -    "&deg;",	"&#176;",    " degrees", /* degree sign */
    5.60 -    "&plusmn;",	"&#177;",    "+-", /* plus-minus sign = plus-or-minus sign */
    5.61 -    "&sup2;",	"&#178;",    "2", /* superscript two = superscript digit two */
    5.62 -    "&sup3;",	"&#179;",    "3", /* superscript three = superscript digit three */
    5.63 -    "&acute;",	"&#180;",    " ", /* acute accent = spacing acute */
    5.64 -    "&micro;",	"&#181;",    "m", /* micro sign */
    5.65 -    "&para;",	"&#182;",    "--", /* pilcrow sign = paragraph sign */
    5.66 -    "&cedil;",	"&#184;",    " ", /* cedilla = spacing cedilla */
    5.67 -    "&sup1;",	"&#185;",    "1", /* superscript one = superscript digit one */
    5.68 -    "&ordm;",	"&#186;",    " ", /* masculine ordinal indicator */
    5.69 -    "&raquo;",	"&#187;",    "\"", /* right-pointing double angle quotation mark */
    5.70 -    "&frac14;",	"&#188;",    "1/4", /* vulgar fraction one quarter */
    5.71 -    "&frac12;",	"&#189;",    "1/2", /* vulgar fraction one half */
    5.72 -    "&frac34;",	"&#190;",    "3/4", /* vulgar fraction three quarters */
    5.73 -    "&iquest;",	"&#191;",    "?", /* inverted question mark */
    5.74 -    "&Agrave;",	"&#192;",    "A", /* latin capital letter A with grave */
    5.75 -    "&Aacute;",	"&#193;",    "A", /* latin capital letter A with acute */
    5.76 -    "&Acirc;",	"&#194;",    "A", /* latin capital letter A with circumflex */
    5.77 -    "&Atilde;",	"&#195;",    "A", /* latin capital letter A with tilde */
    5.78 -    "&Auml;",	"&#196;",    "A", /* latin capital letter A with diaeresis */
    5.79 -    "&Aring;",	"&#197;",    "A", /* latin capital letter A with ring above */
    5.80 -    "&AElig;",	"&#198;",    "AE", /* latin capital letter AE */
    5.81 -    "&Ccedil;",	"&#199;",    "C", /* latin capital letter C with cedilla */
    5.82 -    "&Egrave;",	"&#200;",    "E", /* latin capital letter E with grave */
    5.83 -    "&Eacute;",	"&#201;",    "E", /* latin capital letter E with acute */
    5.84 -    "&Ecirc;",	"&#202;",    "E", /* latin capital letter E with circumflex */
    5.85 -    "&Euml;",	"&#203;",    "E", /* latin capital letter E with diaeresis */
    5.86 -    "&Igrave;",	"&#204;",    "I", /* latin capital letter I with grave */
    5.87 -    "&Iacute;",	"&#205;",    "I", /* latin capital letter I with acute */
    5.88 -    "&Icirc;",	"&#206;",    "I", /* latin capital letter I with circumflex */
    5.89 -    "&Iuml;",	"&#207;",    "I", /* latin capital letter I with diaeresis */
    5.90 -    "&ETH;",	"&#208;",    "E", /* latin capital letter ETH */
    5.91 -    "&Ntilde;",	"&#209;",    "N", /* latin capital letter N with tilde */
    5.92 -    "&Ograve;",	"&#210;",    "O", /* latin capital letter O with grave */
    5.93 -    "&Oacute;",	"&#211;",    "O", /* latin capital letter O with acute */
    5.94 -    "&Ocirc;",	"&#212;",    "O", /* latin capital letter O with circumflex */
    5.95 -    "&Otilde;",	"&#213;",    "O", /* latin capital letter O with tilde */
    5.96 -    "&Ouml;",	"&#214;",    "O", /* latin capital letter O with diaeresis */
    5.97 -    "&times;",	"&#215;",    "*", /* multiplication sign */
    5.98 -    "&Oslash;",	"&#216;",    "O", /* latin capital letter O with stroke */
    5.99 -    "&Ugrave;",	"&#217;",    "U", /* latin capital letter U with grave */
   5.100 -    "&Uacute;",	"&#218;",    "U", /* latin capital letter U with acute */
   5.101 -    "&Ucirc;",	"&#219;",    "U", /* latin capital letter U with circumflex */
   5.102 -    "&Uuml;",	"&#220;",    "U", /* latin capital letter U with diaeresis */
   5.103 -    "&Yacute;",	"&#221;",    "Y", /* latin capital letter Y with acute */
   5.104 -    "&THORN;",	"&#222;",    "TH", /* latin capital letter THORN */
   5.105 -    "&szlig;",	"&#223;",    "sz", /* latin small letter sharp s = ess-zed */
   5.106 -    "&agrave;",	"&#224;",    "a", /* latin small letter a with grave */
   5.107 -    "&aacute;",	"&#225;",    "a", /* latin small letter a with acute */
   5.108 -    "&acirc;",	"&#226;",    "a", /* latin small letter a with circumflex */
   5.109 -    "&atilde;",	"&#227;",    "a", /* latin small letter a with tilde */
   5.110 -    "&auml;",	"&#228;",    "a", /* latin small letter a with diaeresis */
   5.111 -    "&aring;",	"&#229;",    "a", /* latin small letter a with ring above */
   5.112 -    "&aelig;",	"&#230;",    "ae", /* latin small letter ae */
   5.113 -    "&ccedil;",	"&#231;",    "c", /* latin small letter c with cedilla */
   5.114 -    "&egrave;",	"&#232;",    "e", /* latin small letter e with grave */
   5.115 -    "&eacute;",	"&#233;",    "e", /* latin small letter e with acute */
   5.116 -    "&ecirc;",	"&#234;",    "e", /* latin small letter e with circumflex */
   5.117 -    "&euml;",	"&#235;",    "e", /* latin small letter e with diaeresis */
   5.118 -    "&igrave;",	"&#236;",    "i", /* latin small letter i with grave */
   5.119 -    "&iacute;",	"&#237;",    "i", /* latin small letter i with acute */
   5.120 -    "&icirc;",	"&#238;",    "i", /* latin small letter i with circumflex */
   5.121 -    "&iuml;",	"&#239;",    "i", /* latin small letter i with diaeresis */
   5.122 -    "&eth;",	"&#240;",    "eth", /* latin small letter eth */
   5.123 -    "&ntilde;",	"&#241;",    "n", /* latin small letter n with tilde */
   5.124 -    "&ograve;",	"&#242;",    "o", /* latin small letter o with grave */
   5.125 -    "&oacute;",	"&#243;",    "o", /* latin small letter o with acute */
   5.126 -    "&ocirc;",	"&#244;",    "o", /* latin small letter o with circumflex */
   5.127 -    "&otilde;",	"&#245;",    "o", /* latin small letter o with tilde */
   5.128 -    "&ouml;",	"&#246;",    "o", /* latin small letter o with diaeresis */
   5.129 -    "&divide;",	"&#247;",    "/", /* division sign */
   5.130 -    "&oslash;",	"&#248;",    "o", /* latin small letter o with stroke */
   5.131 -    "&ugrave;",	"&#249;",    "u", /* latin small letter u with grave */
   5.132 -    "&uacute;",	"&#250;",    "u", /* latin small letter u with acute */
   5.133 -    "&ucirc;",	"&#251;",    "u", /* latin small letter u with circumflex */
   5.134 -    "&uuml;",	"&#252;",    "u", /* latin small letter u with diaeresis */
   5.135 -    "&yacute;",	"&#253;",    "y", /* latin small letter y with acute */
   5.136 -    "&thorn;",	"&#254;",    "th", /* latin small letter thorn */
   5.137 -    "&yuml;",	"&#255;",    "y", /* latin small letter y with diaeresis */
   5.138 -    "", ""
   5.139 -};
   5.140 -
   5.141  /* special characters */
   5.142  #define CHAR_SPACE	  32
   5.143  #define CHAR_TAB	   9
   5.144 @@ -352,7 +227,7 @@
   5.145  char *linehasmarkup(char *);
   5.146  char *losemarkup(char *);
   5.147  gboolean tagcomp(const char *,const char *);
   5.148 -char *loseentities(char *);
   5.149 +void loseentities(char *);
   5.150  gboolean isroman(const char *);
   5.151  void postprocess_for_DP(char *);
   5.152  
   5.153 @@ -2916,6 +2791,8 @@
   5.154      g_tree_unref(qperiod);
   5.155      g_set_print_handler(NULL);
   5.156      print_as_windows_1252(NULL);
   5.157 +    if (pswit[MARKUP_SWITCH])  
   5.158 +	loseentities(NULL);
   5.159  }
   5.160  
   5.161  /*
   5.162 @@ -3210,8 +3087,7 @@
   5.163  {
   5.164      while (losemarkup(theline))
   5.165  	;
   5.166 -    while (loseentities(theline))
   5.167 -	;
   5.168 +    loseentities(theline);
   5.169  }
   5.170  
   5.171  char *losemarkup(char *theline)
   5.172 @@ -3233,37 +3109,86 @@
   5.173      return NULL;
   5.174  }
   5.175  
   5.176 -char *loseentities(char *theline)
   5.177 +void loseentities(char *theline)
   5.178  {
   5.179      int i;
   5.180 -    char *s,*t;
   5.181 -    if (!*theline) 
   5.182 -	return NULL;
   5.183 -    for (i=0;*entities[i].htmlent;i++)
   5.184 +    gsize nb;
   5.185 +    char *amp,*scolon;
   5.186 +    gchar *s,*t;
   5.187 +    gunichar c;
   5.188 +    GTree *entities=NULL;
   5.189 +    GIConv translit=(GIConv)-1,to_utf8=(GIConv)-1;
   5.190 +    if (!theline)
   5.191      {
   5.192 -	s=strstr(theline,entities[i].htmlent);
   5.193 -	if (s)
   5.194 +	if (entities)
   5.195 +	    g_tree_destroy(entities);
   5.196 +	entities=NULL;
   5.197 +	if (translit==(GIConv)-1)
   5.198 +	    g_iconv_close(translit);
   5.199 +	translit=(GIConv)-1;
   5.200 +	if (to_utf8==(GIConv)-1)
   5.201 +	    g_iconv_close(to_utf8);
   5.202 +	to_utf8=(GIConv)-1;
   5.203 +	return;
   5.204 +    }
   5.205 +    if (!*theline)
   5.206 +	return;
   5.207 +    if (!entities)
   5.208 +    {
   5.209 +	entities=g_tree_new((GCompareFunc)strcmp);
   5.210 +	for(i=0;i<G_N_ELEMENTS(HTMLentities);i++)
   5.211 +	    g_tree_insert(entities,HTMLentities[i].name,
   5.212 +	      GUINT_TO_POINTER(HTMLentities[i].c));
   5.213 +    }
   5.214 +    if (translit==(GIConv)-1)
   5.215 +	translit=g_iconv_open("ISO_8859-1//TRANSLIT","UTF-8");
   5.216 +    if (to_utf8==(GIConv)-1)
   5.217 +	to_utf8=g_iconv_open("UTF-8","ISO_8859-1");
   5.218 +    while((amp=strchr(theline,'&')))
   5.219 +    {
   5.220 +	scolon=strchr(amp,';');
   5.221 +	if (scolon)
   5.222  	{
   5.223 -	    t=g_strdup(s+strlen(entities[i].htmlent));
   5.224 -	    strcpy(s,entities[i].textent);
   5.225 -	    strcat(s,t);
   5.226 -	    g_free(t);
   5.227 -	    return theline;
   5.228 +	    if (amp[1]=='#')
   5.229 +	    {
   5.230 +		if (amp+2+strspn(amp+2,"0123456789")==scolon)
   5.231 +		    c=strtol(amp+2,NULL,10);
   5.232 +		else if (amp[2]=='x' &&
   5.233 +		  amp+3+strspn(amp+3,"0123456789abcdefABCDEF")==scolon)
   5.234 +		    c=strtol(amp+3,NULL,16);
   5.235 +	    }
   5.236 +	    else
   5.237 +	    {
   5.238 +		s=g_strndup(amp+1,scolon-(amp+1));
   5.239 +	        c=GPOINTER_TO_UINT(g_tree_lookup(entities,s));
   5.240 +		g_free(s);
   5.241 +	    }
   5.242  	}
   5.243 +	else
   5.244 +	    c=0;
   5.245 +	if (c)
   5.246 +	{
   5.247 +	    theline=amp;
   5.248 +	    if (c<128 || c>=192 && c<=255)	/* An ISO-8859-1 character */
   5.249 +		theline+=g_unichar_to_utf8(c,theline);
   5.250 +	    else
   5.251 +	    {
   5.252 +		s=g_malloc(6);
   5.253 +		nb=g_unichar_to_utf8(c,s);
   5.254 +		t=g_convert_with_iconv(s,nb,translit,NULL,&nb,NULL);
   5.255 +		g_free(s);
   5.256 +		s=g_convert_with_iconv(t,nb,to_utf8,NULL,&nb,NULL);
   5.257 +		g_free(t);
   5.258 +		memcpy(theline,s,nb);
   5.259 +		g_free(s);
   5.260 +		theline+=nb;
   5.261 +	    }
   5.262 +	    memmove(theline,g_utf8_next_char(scolon),
   5.263 +	      strlen(g_utf8_next_char(scolon))+1);
   5.264 +	}
   5.265 +	else
   5.266 +	    theline=g_utf8_next_char(amp);
   5.267      }
   5.268 -    for (i=0;*entities[i].htmlnum;i++)
   5.269 -    {
   5.270 -	s=strstr(theline,entities[i].htmlnum);
   5.271 -	if (s)
   5.272 -	{
   5.273 -	    t=g_strdup(s+strlen(entities[i].htmlnum));
   5.274 -	    strcpy(s,entities[i].textent);
   5.275 -	    strcat(s,t);
   5.276 -	    g_free(t);
   5.277 -	    return theline;
   5.278 -	}
   5.279 -    }
   5.280 -    return NULL;
   5.281  }
   5.282  
   5.283  gboolean tagcomp(const char *strin,const char *basetag)
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/bookloupe/gen-html-entities.sh	Thu May 30 17:16:37 2013 +0100
     6.3 @@ -0,0 +1,36 @@
     6.4 +#!/bin/sh
     6.5 +
     6.6 +header()
     6.7 +{
     6.8 +cat << EOF
     6.9 +/*
    6.10 + * Automatically generated by gen-html-entities. Do not edit by hand.
    6.11 + */
    6.12 +
    6.13 +struct {
    6.14 +    char *name;
    6.15 +    gunichar c;
    6.16 +} HTMLentities[] = {
    6.17 +EOF
    6.18 +}
    6.19 +
    6.20 +parse_ent_file()
    6.21 +{
    6.22 +    awk '/<!ENTITY .* CDATA/ { \
    6.23 +      c=substr($4,4,length($4)-5);
    6.24 +      print "    { \"" $2 "\", " c, "}," } \
    6.25 +      ' $1
    6.26 +}
    6.27 +
    6.28 +footer()
    6.29 +{
    6.30 +cat << EOF
    6.31 +};
    6.32 +EOF
    6.33 +}
    6.34 +
    6.35 +header
    6.36 +for file; do
    6.37 +    parse_ent_file $file
    6.38 +done
    6.39 +footer
     7.1 --- a/configure.ac	Thu May 30 07:31:24 2013 +0100
     7.2 +++ b/configure.ac	Thu May 30 17:16:37 2013 +0100
     7.3 @@ -13,7 +13,7 @@
     7.4  test/compatibility/Makefile
     7.5  doc/Makefile
     7.6  ])
     7.7 -AM_INIT_AUTOMAKE(no-define,1.11)
     7.8 +AM_INIT_AUTOMAKE([no-define 1.11])
     7.9  AM_SILENT_RULES([yes])
    7.10  AC_CANONICAL_HOST
    7.11