1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/bookloupe/HTMLlat1.ent Thu May 30 17:16:37 2013 +0100
1.3 @@ -0,0 +1,194 @@
1.4 +<!-- Portions © International Organization for Standardization 1986
1.5 + Permission to copy in any form is granted for use with
1.6 + conforming SGML systems and applications as defined in
1.7 + ISO 8879, provided this notice is included in all copies.
1.8 +-->
1.9 +<!-- Character entity set. Typical invocation:
1.10 + <!ENTITY % HTMLlat1 PUBLIC
1.11 + "-//W3C//ENTITIES Latin 1//EN//HTML">
1.12 + %HTMLlat1;
1.13 +-->
1.14 +
1.15 +<!ENTITY nbsp CDATA " " -- no-break space = non-breaking space,
1.16 + U+00A0 ISOnum -->
1.17 +<!ENTITY iexcl CDATA "¡" -- inverted exclamation mark, U+00A1 ISOnum -->
1.18 +<!ENTITY cent CDATA "¢" -- cent sign, U+00A2 ISOnum -->
1.19 +<!ENTITY pound CDATA "£" -- pound sign, U+00A3 ISOnum -->
1.20 +<!ENTITY curren CDATA "¤" -- currency sign, U+00A4 ISOnum -->
1.21 +<!ENTITY yen CDATA "¥" -- yen sign = yuan sign, U+00A5 ISOnum -->
1.22 +<!ENTITY brvbar CDATA "¦" -- broken bar = broken vertical bar,
1.23 + U+00A6 ISOnum -->
1.24 +<!ENTITY sect CDATA "§" -- section sign, U+00A7 ISOnum -->
1.25 +<!ENTITY uml CDATA "¨" -- diaeresis = spacing diaeresis,
1.26 + U+00A8 ISOdia -->
1.27 +<!ENTITY copy CDATA "©" -- copyright sign, U+00A9 ISOnum -->
1.28 +<!ENTITY ordf CDATA "ª" -- feminine ordinal indicator, U+00AA ISOnum -->
1.29 +<!ENTITY laquo CDATA "«" -- left-pointing double angle quotation mark
1.30 + = left pointing guillemet, U+00AB ISOnum -->
1.31 +<!ENTITY not CDATA "¬" -- not sign, U+00AC ISOnum -->
1.32 +<!ENTITY shy CDATA "­" -- soft hyphen = discretionary hyphen,
1.33 + U+00AD ISOnum -->
1.34 +<!ENTITY reg CDATA "®" -- registered sign = registered trade mark sign,
1.35 + U+00AE ISOnum -->
1.36 +<!ENTITY macr CDATA "¯" -- macron = spacing macron = overline
1.37 + = APL overbar, U+00AF ISOdia -->
1.38 +<!ENTITY deg CDATA "°" -- degree sign, U+00B0 ISOnum -->
1.39 +<!ENTITY plusmn CDATA "±" -- plus-minus sign = plus-or-minus sign,
1.40 + U+00B1 ISOnum -->
1.41 +<!ENTITY sup2 CDATA "²" -- superscript two = superscript digit two
1.42 + = squared, U+00B2 ISOnum -->
1.43 +<!ENTITY sup3 CDATA "³" -- superscript three = superscript digit three
1.44 + = cubed, U+00B3 ISOnum -->
1.45 +<!ENTITY acute CDATA "´" -- acute accent = spacing acute,
1.46 + U+00B4 ISOdia -->
1.47 +<!ENTITY micro CDATA "µ" -- micro sign, U+00B5 ISOnum -->
1.48 +<!ENTITY para CDATA "¶" -- pilcrow sign = paragraph sign,
1.49 + U+00B6 ISOnum -->
1.50 +<!ENTITY middot CDATA "·" -- middle dot = Georgian comma
1.51 + = Greek middle dot, U+00B7 ISOnum -->
1.52 +<!ENTITY cedil CDATA "¸" -- cedilla = spacing cedilla, U+00B8 ISOdia -->
1.53 +<!ENTITY sup1 CDATA "¹" -- superscript one = superscript digit one,
1.54 + U+00B9 ISOnum -->
1.55 +<!ENTITY ordm CDATA "º" -- masculine ordinal indicator,
1.56 + U+00BA ISOnum -->
1.57 +<!ENTITY raquo CDATA "»" -- right-pointing double angle quotation mark
1.58 + = right pointing guillemet, U+00BB ISOnum -->
1.59 +<!ENTITY frac14 CDATA "¼" -- vulgar fraction one quarter
1.60 + = fraction one quarter, U+00BC ISOnum -->
1.61 +<!ENTITY frac12 CDATA "½" -- vulgar fraction one half
1.62 + = fraction one half, U+00BD ISOnum -->
1.63 +<!ENTITY frac34 CDATA "¾" -- vulgar fraction three quarters
1.64 + = fraction three quarters, U+00BE ISOnum -->
1.65 +<!ENTITY iquest CDATA "¿" -- inverted question mark
1.66 + = turned question mark, U+00BF ISOnum -->
1.67 +<!ENTITY Agrave CDATA "À" -- latin capital letter A with grave
1.68 + = latin capital letter A grave,
1.69 + U+00C0 ISOlat1 -->
1.70 +<!ENTITY Aacute CDATA "Á" -- latin capital letter A with acute,
1.71 + U+00C1 ISOlat1 -->
1.72 +<!ENTITY Acirc CDATA "Â" -- latin capital letter A with circumflex,
1.73 + U+00C2 ISOlat1 -->
1.74 +<!ENTITY Atilde CDATA "Ã" -- latin capital letter A with tilde,
1.75 + U+00C3 ISOlat1 -->
1.76 +<!ENTITY Auml CDATA "Ä" -- latin capital letter A with diaeresis,
1.77 + U+00C4 ISOlat1 -->
1.78 +<!ENTITY Aring CDATA "Å" -- latin capital letter A with ring above
1.79 + = latin capital letter A ring,
1.80 + U+00C5 ISOlat1 -->
1.81 +<!ENTITY AElig CDATA "Æ" -- latin capital letter AE
1.82 + = latin capital ligature AE,
1.83 + U+00C6 ISOlat1 -->
1.84 +<!ENTITY Ccedil CDATA "Ç" -- latin capital letter C with cedilla,
1.85 + U+00C7 ISOlat1 -->
1.86 +<!ENTITY Egrave CDATA "È" -- latin capital letter E with grave,
1.87 + U+00C8 ISOlat1 -->
1.88 +<!ENTITY Eacute CDATA "É" -- latin capital letter E with acute,
1.89 + U+00C9 ISOlat1 -->
1.90 +<!ENTITY Ecirc CDATA "Ê" -- latin capital letter E with circumflex,
1.91 + U+00CA ISOlat1 -->
1.92 +<!ENTITY Euml CDATA "Ë" -- latin capital letter E with diaeresis,
1.93 + U+00CB ISOlat1 -->
1.94 +<!ENTITY Igrave CDATA "Ì" -- latin capital letter I with grave,
1.95 + U+00CC ISOlat1 -->
1.96 +<!ENTITY Iacute CDATA "Í" -- latin capital letter I with acute,
1.97 + U+00CD ISOlat1 -->
1.98 +<!ENTITY Icirc CDATA "Î" -- latin capital letter I with circumflex,
1.99 + U+00CE ISOlat1 -->
1.100 +<!ENTITY Iuml CDATA "Ï" -- latin capital letter I with diaeresis,
1.101 + U+00CF ISOlat1 -->
1.102 +<!ENTITY ETH CDATA "Ð" -- latin capital letter ETH, U+00D0 ISOlat1 -->
1.103 +<!ENTITY Ntilde CDATA "Ñ" -- latin capital letter N with tilde,
1.104 + U+00D1 ISOlat1 -->
1.105 +<!ENTITY Ograve CDATA "Ò" -- latin capital letter O with grave,
1.106 + U+00D2 ISOlat1 -->
1.107 +<!ENTITY Oacute CDATA "Ó" -- latin capital letter O with acute,
1.108 + U+00D3 ISOlat1 -->
1.109 +<!ENTITY Ocirc CDATA "Ô" -- latin capital letter O with circumflex,
1.110 + U+00D4 ISOlat1 -->
1.111 +<!ENTITY Otilde CDATA "Õ" -- latin capital letter O with tilde,
1.112 + U+00D5 ISOlat1 -->
1.113 +<!ENTITY Ouml CDATA "Ö" -- latin capital letter O with diaeresis,
1.114 + U+00D6 ISOlat1 -->
1.115 +<!ENTITY times CDATA "×" -- multiplication sign, U+00D7 ISOnum -->
1.116 +<!ENTITY Oslash CDATA "Ø" -- latin capital letter O with stroke
1.117 + = latin capital letter O slash,
1.118 + U+00D8 ISOlat1 -->
1.119 +<!ENTITY Ugrave CDATA "Ù" -- latin capital letter U with grave,
1.120 + U+00D9 ISOlat1 -->
1.121 +<!ENTITY Uacute CDATA "Ú" -- latin capital letter U with acute,
1.122 + U+00DA ISOlat1 -->
1.123 +<!ENTITY Ucirc CDATA "Û" -- latin capital letter U with circumflex,
1.124 + U+00DB ISOlat1 -->
1.125 +<!ENTITY Uuml CDATA "Ü" -- latin capital letter U with diaeresis,
1.126 + U+00DC ISOlat1 -->
1.127 +<!ENTITY Yacute CDATA "Ý" -- latin capital letter Y with acute,
1.128 + U+00DD ISOlat1 -->
1.129 +<!ENTITY THORN CDATA "Þ" -- latin capital letter THORN,
1.130 + U+00DE ISOlat1 -->
1.131 +<!ENTITY szlig CDATA "ß" -- latin small letter sharp s = ess-zed,
1.132 + U+00DF ISOlat1 -->
1.133 +<!ENTITY agrave CDATA "à" -- latin small letter a with grave
1.134 + = latin small letter a grave,
1.135 + U+00E0 ISOlat1 -->
1.136 +<!ENTITY aacute CDATA "á" -- latin small letter a with acute,
1.137 + U+00E1 ISOlat1 -->
1.138 +<!ENTITY acirc CDATA "â" -- latin small letter a with circumflex,
1.139 + U+00E2 ISOlat1 -->
1.140 +<!ENTITY atilde CDATA "ã" -- latin small letter a with tilde,
1.141 + U+00E3 ISOlat1 -->
1.142 +<!ENTITY auml CDATA "ä" -- latin small letter a with diaeresis,
1.143 + U+00E4 ISOlat1 -->
1.144 +<!ENTITY aring CDATA "å" -- latin small letter a with ring above
1.145 + = latin small letter a ring,
1.146 + U+00E5 ISOlat1 -->
1.147 +<!ENTITY aelig CDATA "æ" -- latin small letter ae
1.148 + = latin small ligature ae, U+00E6 ISOlat1 -->
1.149 +<!ENTITY ccedil CDATA "ç" -- latin small letter c with cedilla,
1.150 + U+00E7 ISOlat1 -->
1.151 +<!ENTITY egrave CDATA "è" -- latin small letter e with grave,
1.152 + U+00E8 ISOlat1 -->
1.153 +<!ENTITY eacute CDATA "é" -- latin small letter e with acute,
1.154 + U+00E9 ISOlat1 -->
1.155 +<!ENTITY ecirc CDATA "ê" -- latin small letter e with circumflex,
1.156 + U+00EA ISOlat1 -->
1.157 +<!ENTITY euml CDATA "ë" -- latin small letter e with diaeresis,
1.158 + U+00EB ISOlat1 -->
1.159 +<!ENTITY igrave CDATA "ì" -- latin small letter i with grave,
1.160 + U+00EC ISOlat1 -->
1.161 +<!ENTITY iacute CDATA "í" -- latin small letter i with acute,
1.162 + U+00ED ISOlat1 -->
1.163 +<!ENTITY icirc CDATA "î" -- latin small letter i with circumflex,
1.164 + U+00EE ISOlat1 -->
1.165 +<!ENTITY iuml CDATA "ï" -- latin small letter i with diaeresis,
1.166 + U+00EF ISOlat1 -->
1.167 +<!ENTITY eth CDATA "ð" -- latin small letter eth, U+00F0 ISOlat1 -->
1.168 +<!ENTITY ntilde CDATA "ñ" -- latin small letter n with tilde,
1.169 + U+00F1 ISOlat1 -->
1.170 +<!ENTITY ograve CDATA "ò" -- latin small letter o with grave,
1.171 + U+00F2 ISOlat1 -->
1.172 +<!ENTITY oacute CDATA "ó" -- latin small letter o with acute,
1.173 + U+00F3 ISOlat1 -->
1.174 +<!ENTITY ocirc CDATA "ô" -- latin small letter o with circumflex,
1.175 + U+00F4 ISOlat1 -->
1.176 +<!ENTITY otilde CDATA "õ" -- latin small letter o with tilde,
1.177 + U+00F5 ISOlat1 -->
1.178 +<!ENTITY ouml CDATA "ö" -- latin small letter o with diaeresis,
1.179 + U+00F6 ISOlat1 -->
1.180 +<!ENTITY divide CDATA "÷" -- division sign, U+00F7 ISOnum -->
1.181 +<!ENTITY oslash CDATA "ø" -- latin small letter o with stroke,
1.182 + = latin small letter o slash,
1.183 + U+00F8 ISOlat1 -->
1.184 +<!ENTITY ugrave CDATA "ù" -- latin small letter u with grave,
1.185 + U+00F9 ISOlat1 -->
1.186 +<!ENTITY uacute CDATA "ú" -- latin small letter u with acute,
1.187 + U+00FA ISOlat1 -->
1.188 +<!ENTITY ucirc CDATA "û" -- latin small letter u with circumflex,
1.189 + U+00FB ISOlat1 -->
1.190 +<!ENTITY uuml CDATA "ü" -- latin small letter u with diaeresis,
1.191 + U+00FC ISOlat1 -->
1.192 +<!ENTITY yacute CDATA "ý" -- latin small letter y with acute,
1.193 + U+00FD ISOlat1 -->
1.194 +<!ENTITY thorn CDATA "þ" -- latin small letter thorn,
1.195 + U+00FE ISOlat1 -->
1.196 +<!ENTITY yuml CDATA "ÿ" -- latin small letter y with diaeresis,
1.197 + U+00FF ISOlat1 -->
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
2.2 +++ b/bookloupe/HTMLspecial.ent Thu May 30 17:16:37 2013 +0100
2.3 @@ -0,0 +1,77 @@
2.4 +<!-- Special characters for HTML -->
2.5 +
2.6 +<!-- Character entity set. Typical invocation:
2.7 + <!ENTITY % HTMLspecial PUBLIC
2.8 + "-//W3C//ENTITIES Special//EN//HTML">
2.9 + %HTMLspecial; -->
2.10 +
2.11 +<!-- Portions © International Organization for Standardization 1986:
2.12 + Permission to copy in any form is granted for use with
2.13 + conforming SGML systems and applications as defined in
2.14 + ISO 8879, provided this notice is included in all copies.
2.15 +-->
2.16 +
2.17 +<!-- Relevant ISO entity set is given unless names are newly introduced.
2.18 + New names (i.e., not in ISO 8879 list) do not clash with any
2.19 + existing ISO 8879 entity names. ISO 10646 character numbers
2.20 + are given for each character, in hex. CDATA values are decimal
2.21 + conversions of the ISO 10646 values and refer to the document
2.22 + character set. Names are ISO 10646 names.
2.23 +
2.24 +-->
2.25 +
2.26 +<!-- C0 Controls and Basic Latin -->
2.27 +<!ENTITY quot CDATA """ -- quotation mark = APL quote,
2.28 + U+0022 ISOnum -->
2.29 +<!ENTITY amp CDATA "&" -- ampersand, U+0026 ISOnum -->
2.30 +<!ENTITY lt CDATA "<" -- less-than sign, U+003C ISOnum -->
2.31 +<!ENTITY gt CDATA ">" -- greater-than sign, U+003E ISOnum -->
2.32 +
2.33 +<!-- Latin Extended-A -->
2.34 +<!ENTITY OElig CDATA "Œ" -- latin capital ligature OE,
2.35 + U+0152 ISOlat2 -->
2.36 +<!ENTITY oelig CDATA "œ" -- latin small ligature oe, U+0153 ISOlat2 -->
2.37 +<!-- ligature is a misnomer, this is a separate character in some languages -->
2.38 +<!ENTITY Scaron CDATA "Š" -- latin capital letter S with caron,
2.39 + U+0160 ISOlat2 -->
2.40 +<!ENTITY scaron CDATA "š" -- latin small letter s with caron,
2.41 + U+0161 ISOlat2 -->
2.42 +<!ENTITY Yuml CDATA "Ÿ" -- latin capital letter Y with diaeresis,
2.43 + U+0178 ISOlat2 -->
2.44 +
2.45 +<!-- Spacing Modifier Letters -->
2.46 +<!ENTITY circ CDATA "ˆ" -- modifier letter circumflex accent,
2.47 + U+02C6 ISOpub -->
2.48 +<!ENTITY tilde CDATA "˜" -- small tilde, U+02DC ISOdia -->
2.49 +
2.50 +<!-- General Punctuation -->
2.51 +<!ENTITY ensp CDATA " " -- en space, U+2002 ISOpub -->
2.52 +<!ENTITY emsp CDATA " " -- em space, U+2003 ISOpub -->
2.53 +<!ENTITY thinsp CDATA " " -- thin space, U+2009 ISOpub -->
2.54 +<!ENTITY zwnj CDATA "‌" -- zero width non-joiner,
2.55 + U+200C NEW RFC 2070 -->
2.56 +<!ENTITY zwj CDATA "‍" -- zero width joiner, U+200D NEW RFC 2070 -->
2.57 +<!ENTITY lrm CDATA "‎" -- left-to-right mark, U+200E NEW RFC 2070 -->
2.58 +<!ENTITY rlm CDATA "‏" -- right-to-left mark, U+200F NEW RFC 2070 -->
2.59 +<!ENTITY ndash CDATA "–" -- en dash, U+2013 ISOpub -->
2.60 +<!ENTITY mdash CDATA "—" -- em dash, U+2014 ISOpub -->
2.61 +<!ENTITY lsquo CDATA "‘" -- left single quotation mark,
2.62 + U+2018 ISOnum -->
2.63 +<!ENTITY rsquo CDATA "’" -- right single quotation mark,
2.64 + U+2019 ISOnum -->
2.65 +<!ENTITY sbquo CDATA "‚" -- single low-9 quotation mark, U+201A NEW -->
2.66 +<!ENTITY ldquo CDATA "“" -- left double quotation mark,
2.67 + U+201C ISOnum -->
2.68 +<!ENTITY rdquo CDATA "”" -- right double quotation mark,
2.69 + U+201D ISOnum -->
2.70 +<!ENTITY bdquo CDATA "„" -- double low-9 quotation mark, U+201E NEW -->
2.71 +<!ENTITY dagger CDATA "†" -- dagger, U+2020 ISOpub -->
2.72 +<!ENTITY Dagger CDATA "‡" -- double dagger, U+2021 ISOpub -->
2.73 +<!ENTITY permil CDATA "‰" -- per mille sign, U+2030 ISOtech -->
2.74 +<!ENTITY lsaquo CDATA "‹" -- single left-pointing angle quotation mark,
2.75 + U+2039 ISO proposed -->
2.76 +<!-- lsaquo is proposed but not yet ISO standardized -->
2.77 +<!ENTITY rsaquo CDATA "›" -- single right-pointing angle quotation mark,
2.78 + U+203A ISO proposed -->
2.79 +<!-- rsaquo is proposed but not yet ISO standardized -->
2.80 +<!ENTITY euro CDATA "€" -- euro sign, U+20AC NEW -->
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
3.2 +++ b/bookloupe/HTMLsymbol.ent Thu May 30 17:16:37 2013 +0100
3.3 @@ -0,0 +1,241 @@
3.4 +<!-- Mathematical, Greek and Symbolic characters for HTML -->
3.5 +
3.6 +<!-- Character entity set. Typical invocation:
3.7 + <!ENTITY % HTMLsymbol PUBLIC
3.8 + "-//W3C//ENTITIES Symbols//EN//HTML">
3.9 + %HTMLsymbol; -->
3.10 +
3.11 +<!-- Portions © International Organization for Standardization 1986:
3.12 + Permission to copy in any form is granted for use with
3.13 + conforming SGML systems and applications as defined in
3.14 + ISO 8879, provided this notice is included in all copies.
3.15 +-->
3.16 +
3.17 +<!-- Relevant ISO entity set is given unless names are newly introduced.
3.18 + New names (i.e., not in ISO 8879 list) do not clash with any
3.19 + existing ISO 8879 entity names. ISO 10646 character numbers
3.20 + are given for each character, in hex. CDATA values are decimal
3.21 + conversions of the ISO 10646 values and refer to the document
3.22 + character set. Names are ISO 10646 names.
3.23 +
3.24 +-->
3.25 +
3.26 +<!-- Latin Extended-B -->
3.27 +<!ENTITY fnof CDATA "ƒ" -- latin small f with hook = function
3.28 + = florin, U+0192 ISOtech -->
3.29 +
3.30 +<!-- Greek -->
3.31 +<!ENTITY Alpha CDATA "Α" -- greek capital letter alpha, U+0391 -->
3.32 +<!ENTITY Beta CDATA "Β" -- greek capital letter beta, U+0392 -->
3.33 +<!ENTITY Gamma CDATA "Γ" -- greek capital letter gamma,
3.34 + U+0393 ISOgrk3 -->
3.35 +<!ENTITY Delta CDATA "Δ" -- greek capital letter delta,
3.36 + U+0394 ISOgrk3 -->
3.37 +<!ENTITY Epsilon CDATA "Ε" -- greek capital letter epsilon, U+0395 -->
3.38 +<!ENTITY Zeta CDATA "Ζ" -- greek capital letter zeta, U+0396 -->
3.39 +<!ENTITY Eta CDATA "Η" -- greek capital letter eta, U+0397 -->
3.40 +<!ENTITY Theta CDATA "Θ" -- greek capital letter theta,
3.41 + U+0398 ISOgrk3 -->
3.42 +<!ENTITY Iota CDATA "Ι" -- greek capital letter iota, U+0399 -->
3.43 +<!ENTITY Kappa CDATA "Κ" -- greek capital letter kappa, U+039A -->
3.44 +<!ENTITY Lambda CDATA "Λ" -- greek capital letter lambda,
3.45 + U+039B ISOgrk3 -->
3.46 +<!ENTITY Mu CDATA "Μ" -- greek capital letter mu, U+039C -->
3.47 +<!ENTITY Nu CDATA "Ν" -- greek capital letter nu, U+039D -->
3.48 +<!ENTITY Xi CDATA "Ξ" -- greek capital letter xi, U+039E ISOgrk3 -->
3.49 +<!ENTITY Omicron CDATA "Ο" -- greek capital letter omicron, U+039F -->
3.50 +<!ENTITY Pi CDATA "Π" -- greek capital letter pi, U+03A0 ISOgrk3 -->
3.51 +<!ENTITY Rho CDATA "Ρ" -- greek capital letter rho, U+03A1 -->
3.52 +<!-- there is no Sigmaf, and no U+03A2 character either -->
3.53 +<!ENTITY Sigma CDATA "Σ" -- greek capital letter sigma,
3.54 + U+03A3 ISOgrk3 -->
3.55 +<!ENTITY Tau CDATA "Τ" -- greek capital letter tau, U+03A4 -->
3.56 +<!ENTITY Upsilon CDATA "Υ" -- greek capital letter upsilon,
3.57 + U+03A5 ISOgrk3 -->
3.58 +<!ENTITY Phi CDATA "Φ" -- greek capital letter phi,
3.59 + U+03A6 ISOgrk3 -->
3.60 +<!ENTITY Chi CDATA "Χ" -- greek capital letter chi, U+03A7 -->
3.61 +<!ENTITY Psi CDATA "Ψ" -- greek capital letter psi,
3.62 + U+03A8 ISOgrk3 -->
3.63 +<!ENTITY Omega CDATA "Ω" -- greek capital letter omega,
3.64 + U+03A9 ISOgrk3 -->
3.65 +
3.66 +<!ENTITY alpha CDATA "α" -- greek small letter alpha,
3.67 + U+03B1 ISOgrk3 -->
3.68 +<!ENTITY beta CDATA "β" -- greek small letter beta, U+03B2 ISOgrk3 -->
3.69 +<!ENTITY gamma CDATA "γ" -- greek small letter gamma,
3.70 + U+03B3 ISOgrk3 -->
3.71 +<!ENTITY delta CDATA "δ" -- greek small letter delta,
3.72 + U+03B4 ISOgrk3 -->
3.73 +<!ENTITY epsilon CDATA "ε" -- greek small letter epsilon,
3.74 + U+03B5 ISOgrk3 -->
3.75 +<!ENTITY zeta CDATA "ζ" -- greek small letter zeta, U+03B6 ISOgrk3 -->
3.76 +<!ENTITY eta CDATA "η" -- greek small letter eta, U+03B7 ISOgrk3 -->
3.77 +<!ENTITY theta CDATA "θ" -- greek small letter theta,
3.78 + U+03B8 ISOgrk3 -->
3.79 +<!ENTITY iota CDATA "ι" -- greek small letter iota, U+03B9 ISOgrk3 -->
3.80 +<!ENTITY kappa CDATA "κ" -- greek small letter kappa,
3.81 + U+03BA ISOgrk3 -->
3.82 +<!ENTITY lambda CDATA "λ" -- greek small letter lambda,
3.83 + U+03BB ISOgrk3 -->
3.84 +<!ENTITY mu CDATA "μ" -- greek small letter mu, U+03BC ISOgrk3 -->
3.85 +<!ENTITY nu CDATA "ν" -- greek small letter nu, U+03BD ISOgrk3 -->
3.86 +<!ENTITY xi CDATA "ξ" -- greek small letter xi, U+03BE ISOgrk3 -->
3.87 +<!ENTITY omicron CDATA "ο" -- greek small letter omicron, U+03BF NEW -->
3.88 +<!ENTITY pi CDATA "π" -- greek small letter pi, U+03C0 ISOgrk3 -->
3.89 +<!ENTITY rho CDATA "ρ" -- greek small letter rho, U+03C1 ISOgrk3 -->
3.90 +<!ENTITY sigmaf CDATA "ς" -- greek small letter final sigma,
3.91 + U+03C2 ISOgrk3 -->
3.92 +<!ENTITY sigma CDATA "σ" -- greek small letter sigma,
3.93 + U+03C3 ISOgrk3 -->
3.94 +<!ENTITY tau CDATA "τ" -- greek small letter tau, U+03C4 ISOgrk3 -->
3.95 +<!ENTITY upsilon CDATA "υ" -- greek small letter upsilon,
3.96 + U+03C5 ISOgrk3 -->
3.97 +<!ENTITY phi CDATA "φ" -- greek small letter phi, U+03C6 ISOgrk3 -->
3.98 +<!ENTITY chi CDATA "χ" -- greek small letter chi, U+03C7 ISOgrk3 -->
3.99 +<!ENTITY psi CDATA "ψ" -- greek small letter psi, U+03C8 ISOgrk3 -->
3.100 +<!ENTITY omega CDATA "ω" -- greek small letter omega,
3.101 + U+03C9 ISOgrk3 -->
3.102 +<!ENTITY thetasym CDATA "ϑ" -- greek small letter theta symbol,
3.103 + U+03D1 NEW -->
3.104 +<!ENTITY upsih CDATA "ϒ" -- greek upsilon with hook symbol,
3.105 + U+03D2 NEW -->
3.106 +<!ENTITY piv CDATA "ϖ" -- greek pi symbol, U+03D6 ISOgrk3 -->
3.107 +
3.108 +<!-- General Punctuation -->
3.109 +<!ENTITY bull CDATA "•" -- bullet = black small circle,
3.110 + U+2022 ISOpub -->
3.111 +<!-- bullet is NOT the same as bullet operator, U+2219 -->
3.112 +<!ENTITY hellip CDATA "…" -- horizontal ellipsis = three dot leader,
3.113 + U+2026 ISOpub -->
3.114 +<!ENTITY prime CDATA "′" -- prime = minutes = feet, U+2032 ISOtech -->
3.115 +<!ENTITY Prime CDATA "″" -- double prime = seconds = inches,
3.116 + U+2033 ISOtech -->
3.117 +<!ENTITY oline CDATA "‾" -- overline = spacing overscore,
3.118 + U+203E NEW -->
3.119 +<!ENTITY frasl CDATA "⁄" -- fraction slash, U+2044 NEW -->
3.120 +
3.121 +<!-- Letterlike Symbols -->
3.122 +<!ENTITY weierp CDATA "℘" -- script capital P = power set
3.123 + = Weierstrass p, U+2118 ISOamso -->
3.124 +<!ENTITY image CDATA "ℑ" -- blackletter capital I = imaginary part,
3.125 + U+2111 ISOamso -->
3.126 +<!ENTITY real CDATA "ℜ" -- blackletter capital R = real part symbol,
3.127 + U+211C ISOamso -->
3.128 +<!ENTITY trade CDATA "™" -- trade mark sign, U+2122 ISOnum -->
3.129 +<!ENTITY alefsym CDATA "ℵ" -- alef symbol = first transfinite cardinal,
3.130 + U+2135 NEW -->
3.131 +<!-- alef symbol is NOT the same as hebrew letter alef,
3.132 + U+05D0 although the same glyph could be used to depict both characters -->
3.133 +
3.134 +<!-- Arrows -->
3.135 +<!ENTITY larr CDATA "←" -- leftwards arrow, U+2190 ISOnum -->
3.136 +<!ENTITY uarr CDATA "↑" -- upwards arrow, U+2191 ISOnum-->
3.137 +<!ENTITY rarr CDATA "→" -- rightwards arrow, U+2192 ISOnum -->
3.138 +<!ENTITY darr CDATA "↓" -- downwards arrow, U+2193 ISOnum -->
3.139 +<!ENTITY harr CDATA "↔" -- left right arrow, U+2194 ISOamsa -->
3.140 +<!ENTITY crarr CDATA "↵" -- downwards arrow with corner leftwards
3.141 + = carriage return, U+21B5 NEW -->
3.142 +<!ENTITY lArr CDATA "⇐" -- leftwards double arrow, U+21D0 ISOtech -->
3.143 +<!-- ISO 10646 does not say that lArr is the same as the 'is implied by' arrow
3.144 + but also does not have any other character for that function. So ? lArr can
3.145 + be used for 'is implied by' as ISOtech suggests -->
3.146 +<!ENTITY uArr CDATA "⇑" -- upwards double arrow, U+21D1 ISOamsa -->
3.147 +<!ENTITY rArr CDATA "⇒" -- rightwards double arrow,
3.148 + U+21D2 ISOtech -->
3.149 +<!-- ISO 10646 does not say this is the 'implies' character but does not have
3.150 + another character with this function so ?
3.151 + rArr can be used for 'implies' as ISOtech suggests -->
3.152 +<!ENTITY dArr CDATA "⇓" -- downwards double arrow, U+21D3 ISOamsa -->
3.153 +<!ENTITY hArr CDATA "⇔" -- left right double arrow,
3.154 + U+21D4 ISOamsa -->
3.155 +
3.156 +<!-- Mathematical Operators -->
3.157 +<!ENTITY forall CDATA "∀" -- for all, U+2200 ISOtech -->
3.158 +<!ENTITY part CDATA "∂" -- partial differential, U+2202 ISOtech -->
3.159 +<!ENTITY exist CDATA "∃" -- there exists, U+2203 ISOtech -->
3.160 +<!ENTITY empty CDATA "∅" -- empty set = null set = diameter,
3.161 + U+2205 ISOamso -->
3.162 +<!ENTITY nabla CDATA "∇" -- nabla = backward difference,
3.163 + U+2207 ISOtech -->
3.164 +<!ENTITY isin CDATA "∈" -- element of, U+2208 ISOtech -->
3.165 +<!ENTITY notin CDATA "∉" -- not an element of, U+2209 ISOtech -->
3.166 +<!ENTITY ni CDATA "∋" -- contains as member, U+220B ISOtech -->
3.167 +<!-- should there be a more memorable name than 'ni'? -->
3.168 +<!ENTITY prod CDATA "∏" -- n-ary product = product sign,
3.169 + U+220F ISOamsb -->
3.170 +<!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though
3.171 + the same glyph might be used for both -->
3.172 +<!ENTITY sum CDATA "∑" -- n-ary sumation, U+2211 ISOamsb -->
3.173 +<!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
3.174 + though the same glyph might be used for both -->
3.175 +<!ENTITY minus CDATA "−" -- minus sign, U+2212 ISOtech -->
3.176 +<!ENTITY lowast CDATA "∗" -- asterisk operator, U+2217 ISOtech -->
3.177 +<!ENTITY radic CDATA "√" -- square root = radical sign,
3.178 + U+221A ISOtech -->
3.179 +<!ENTITY prop CDATA "∝" -- proportional to, U+221D ISOtech -->
3.180 +<!ENTITY infin CDATA "∞" -- infinity, U+221E ISOtech -->
3.181 +<!ENTITY ang CDATA "∠" -- angle, U+2220 ISOamso -->
3.182 +<!ENTITY and CDATA "∧" -- logical and = wedge, U+2227 ISOtech -->
3.183 +<!ENTITY or CDATA "∨" -- logical or = vee, U+2228 ISOtech -->
3.184 +<!ENTITY cap CDATA "∩" -- intersection = cap, U+2229 ISOtech -->
3.185 +<!ENTITY cup CDATA "∪" -- union = cup, U+222A ISOtech -->
3.186 +<!ENTITY int CDATA "∫" -- integral, U+222B ISOtech -->
3.187 +<!ENTITY there4 CDATA "∴" -- therefore, U+2234 ISOtech -->
3.188 +<!ENTITY sim CDATA "∼" -- tilde operator = varies with = similar to,
3.189 + U+223C ISOtech -->
3.190 +<!-- tilde operator is NOT the same character as the tilde, U+007E,
3.191 + although the same glyph might be used to represent both -->
3.192 +<!ENTITY cong CDATA "≅" -- approximately equal to, U+2245 ISOtech -->
3.193 +<!ENTITY asymp CDATA "≈" -- almost equal to = asymptotic to,
3.194 + U+2248 ISOamsr -->
3.195 +<!ENTITY ne CDATA "≠" -- not equal to, U+2260 ISOtech -->
3.196 +<!ENTITY equiv CDATA "≡" -- identical to, U+2261 ISOtech -->
3.197 +<!ENTITY le CDATA "≤" -- less-than or equal to, U+2264 ISOtech -->
3.198 +<!ENTITY ge CDATA "≥" -- greater-than or equal to,
3.199 + U+2265 ISOtech -->
3.200 +<!ENTITY sub CDATA "⊂" -- subset of, U+2282 ISOtech -->
3.201 +<!ENTITY sup CDATA "⊃" -- superset of, U+2283 ISOtech -->
3.202 +<!-- note that nsup, 'not a superset of, U+2283' is not covered by the Symbol
3.203 + font encoding and is not included. Should it be, for symmetry?
3.204 + It is in ISOamsn -->
3.205 +<!ENTITY nsub CDATA "⊄" -- not a subset of, U+2284 ISOamsn -->
3.206 +<!ENTITY sube CDATA "⊆" -- subset of or equal to, U+2286 ISOtech -->
3.207 +<!ENTITY supe CDATA "⊇" -- superset of or equal to,
3.208 + U+2287 ISOtech -->
3.209 +<!ENTITY oplus CDATA "⊕" -- circled plus = direct sum,
3.210 + U+2295 ISOamsb -->
3.211 +<!ENTITY otimes CDATA "⊗" -- circled times = vector product,
3.212 + U+2297 ISOamsb -->
3.213 +<!ENTITY perp CDATA "⊥" -- up tack = orthogonal to = perpendicular,
3.214 + U+22A5 ISOtech -->
3.215 +<!ENTITY sdot CDATA "⋅" -- dot operator, U+22C5 ISOamsb -->
3.216 +<!-- dot operator is NOT the same character as U+00B7 middle dot -->
3.217 +
3.218 +<!-- Miscellaneous Technical -->
3.219 +<!ENTITY lceil CDATA "⌈" -- left ceiling = apl upstile,
3.220 + U+2308 ISOamsc -->
3.221 +<!ENTITY rceil CDATA "⌉" -- right ceiling, U+2309 ISOamsc -->
3.222 +<!ENTITY lfloor CDATA "⌊" -- left floor = apl downstile,
3.223 + U+230A ISOamsc -->
3.224 +<!ENTITY rfloor CDATA "⌋" -- right floor, U+230B ISOamsc -->
3.225 +<!ENTITY lang CDATA "〈" -- left-pointing angle bracket = bra,
3.226 + U+2329 ISOtech -->
3.227 +<!-- lang is NOT the same character as U+003C 'less than'
3.228 + or U+2039 'single left-pointing angle quotation mark' -->
3.229 +<!ENTITY rang CDATA "〉" -- right-pointing angle bracket = ket,
3.230 + U+232A ISOtech -->
3.231 +<!-- rang is NOT the same character as U+003E 'greater than'
3.232 + or U+203A 'single right-pointing angle quotation mark' -->
3.233 +
3.234 +<!-- Geometric Shapes -->
3.235 +<!ENTITY loz CDATA "◊" -- lozenge, U+25CA ISOpub -->
3.236 +
3.237 +<!-- Miscellaneous Symbols -->
3.238 +<!ENTITY spades CDATA "♠" -- black spade suit, U+2660 ISOpub -->
3.239 +<!-- black here seems to mean filled as opposed to hollow -->
3.240 +<!ENTITY clubs CDATA "♣" -- black club suit = shamrock,
3.241 + U+2663 ISOpub -->
3.242 +<!ENTITY hearts CDATA "♥" -- black heart suit = valentine,
3.243 + U+2665 ISOpub -->
3.244 +<!ENTITY diams CDATA "♦" -- black diamond suit, U+2666 ISOpub -->
4.1 --- a/bookloupe/Makefile.am Thu May 30 07:31:24 2013 +0100
4.2 +++ b/bookloupe/Makefile.am Thu May 30 17:16:37 2013 +0100
4.3 @@ -8,5 +8,12 @@
4.4 bookloupe.typ: bookloupe.typ.in
4.5 sed 's/$$/\r/' $< > $@
4.6
4.7 -EXTRA_DIST=bookloupe.typ.in
4.8 -CLEANFILES=bookloupe.typ
4.9 +HTMLentities.h: gen-html-entities.sh HTMLlat1.ent HTMLsymbol.ent HTMLspecial.ent
4.10 + sh ${srcdir}/gen-html-entities.sh ${srcdir}/HTMLlat1.ent \
4.11 + ${srcdir}/HTMLsymbol.ent ${srcdir}/HTMLspecial.ent > $@
4.12 +
4.13 +bookloupe.$(OBJEXT): HTMLentities.h
4.14 +
4.15 +EXTRA_DIST=bookloupe.typ.in gen-html-entities.sh HTMLlat1.ent HTMLsymbol.ent \
4.16 + HTMLspecial.ent
4.17 +CLEANFILES=bookloupe.typ HTMLentities.h
5.1 --- a/bookloupe/bookloupe.c Thu May 30 07:31:24 2013 +0100
5.2 +++ b/bookloupe/bookloupe.c Thu May 30 17:16:37 2013 +0100
5.3 @@ -24,6 +24,7 @@
5.4 #include <ctype.h>
5.5 #include <glib.h>
5.6 #include <bl/bl.h>
5.7 +#include "HTMLentities.h"
5.8
5.9 gchar *prevline;
5.10
5.11 @@ -119,132 +120,6 @@
5.12 "among", "those", "into", "whom", "having", "thence", ""
5.13 };
5.14
5.15 -struct {
5.16 - char *htmlent;
5.17 - char *htmlnum;
5.18 - char *textent;
5.19 -} entities[] = {
5.20 - "&", "&", "&",
5.21 - "<", "<", "<",
5.22 - ">", ">", ">",
5.23 - "°", "°", " degrees",
5.24 - "£", "£", "L",
5.25 - """, """, "\"", /* quotation mark = APL quote */
5.26 - "Œ", "Œ", "OE", /* latin capital ligature OE */
5.27 - "œ", "œ", "oe", /* latin small ligature oe */
5.28 - "Š", "Š", "S", /* latin capital letter S with caron */
5.29 - "š", "š", "s", /* latin small letter s with caron */
5.30 - "Ÿ", "Ÿ", "Y", /* latin capital letter Y with diaeresis */
5.31 - "ˆ", "ˆ", "", /* modifier letter circumflex accent */
5.32 - "˜", "˜", "~", /* small tilde, U+02DC ISOdia */
5.33 - " ", " ", " ", /* en space, U+2002 ISOpub */
5.34 - " ", " ", " ", /* em space, U+2003 ISOpub */
5.35 - " ", " ", " ", /* thin space, U+2009 ISOpub */
5.36 - "–", "–", "-", /* en dash, U+2013 ISOpub */
5.37 - "—", "—", "--", /* em dash, U+2014 ISOpub */
5.38 - "’", "’", "'", /* right single quotation mark */
5.39 - "‚", "‚", "'", /* single low-9 quotation mark */
5.40 - "“", "“", "\"", /* left double quotation mark */
5.41 - "”", "”", "\"", /* right double quotation mark */
5.42 - "„", "„", "\"", /* double low-9 quotation mark */
5.43 - "‹", "‹", "\"", /* single left-pointing angle quotation mark */
5.44 - "›", "›", "\"", /* single right-pointing angle quotation mark */
5.45 - " ", " ", " ", /* no-break space = non-breaking space, */
5.46 - "¡", "¡", "!", /* inverted exclamation mark */
5.47 - "¢", "¢", "c", /* cent sign */
5.48 - "£", "£", "L", /* pound sign */
5.49 - "¤", "¤", "$", /* currency sign */
5.50 - "¥", "¥", "Y", /* yen sign = yuan sign */
5.51 - "§", "§", "--", /* section sign */
5.52 - "¨", "¨", " ", /* diaeresis = spacing diaeresis */
5.53 - "©", "©", "(C) ", /* copyright sign */
5.54 - "ª", "ª", " ", /* feminine ordinal indicator */
5.55 - "«", "«", "\"", /* left-pointing double angle quotation mark */
5.56 - "­", "­", "-", /* soft hyphen = discretionary hyphen */
5.57 - "®", "®", "(R) ", /* registered sign = registered trade mark sign */
5.58 - "¯", "¯", " ", /* macron = spacing macron = overline */
5.59 - "°", "°", " degrees", /* degree sign */
5.60 - "±", "±", "+-", /* plus-minus sign = plus-or-minus sign */
5.61 - "²", "²", "2", /* superscript two = superscript digit two */
5.62 - "³", "³", "3", /* superscript three = superscript digit three */
5.63 - "´", "´", " ", /* acute accent = spacing acute */
5.64 - "µ", "µ", "m", /* micro sign */
5.65 - "¶", "¶", "--", /* pilcrow sign = paragraph sign */
5.66 - "¸", "¸", " ", /* cedilla = spacing cedilla */
5.67 - "¹", "¹", "1", /* superscript one = superscript digit one */
5.68 - "º", "º", " ", /* masculine ordinal indicator */
5.69 - "»", "»", "\"", /* right-pointing double angle quotation mark */
5.70 - "¼", "¼", "1/4", /* vulgar fraction one quarter */
5.71 - "½", "½", "1/2", /* vulgar fraction one half */
5.72 - "¾", "¾", "3/4", /* vulgar fraction three quarters */
5.73 - "¿", "¿", "?", /* inverted question mark */
5.74 - "À", "À", "A", /* latin capital letter A with grave */
5.75 - "Á", "Á", "A", /* latin capital letter A with acute */
5.76 - "Â", "Â", "A", /* latin capital letter A with circumflex */
5.77 - "Ã", "Ã", "A", /* latin capital letter A with tilde */
5.78 - "Ä", "Ä", "A", /* latin capital letter A with diaeresis */
5.79 - "Å", "Å", "A", /* latin capital letter A with ring above */
5.80 - "Æ", "Æ", "AE", /* latin capital letter AE */
5.81 - "Ç", "Ç", "C", /* latin capital letter C with cedilla */
5.82 - "È", "È", "E", /* latin capital letter E with grave */
5.83 - "É", "É", "E", /* latin capital letter E with acute */
5.84 - "Ê", "Ê", "E", /* latin capital letter E with circumflex */
5.85 - "Ë", "Ë", "E", /* latin capital letter E with diaeresis */
5.86 - "Ì", "Ì", "I", /* latin capital letter I with grave */
5.87 - "Í", "Í", "I", /* latin capital letter I with acute */
5.88 - "Î", "Î", "I", /* latin capital letter I with circumflex */
5.89 - "Ï", "Ï", "I", /* latin capital letter I with diaeresis */
5.90 - "Ð", "Ð", "E", /* latin capital letter ETH */
5.91 - "Ñ", "Ñ", "N", /* latin capital letter N with tilde */
5.92 - "Ò", "Ò", "O", /* latin capital letter O with grave */
5.93 - "Ó", "Ó", "O", /* latin capital letter O with acute */
5.94 - "Ô", "Ô", "O", /* latin capital letter O with circumflex */
5.95 - "Õ", "Õ", "O", /* latin capital letter O with tilde */
5.96 - "Ö", "Ö", "O", /* latin capital letter O with diaeresis */
5.97 - "×", "×", "*", /* multiplication sign */
5.98 - "Ø", "Ø", "O", /* latin capital letter O with stroke */
5.99 - "Ù", "Ù", "U", /* latin capital letter U with grave */
5.100 - "Ú", "Ú", "U", /* latin capital letter U with acute */
5.101 - "Û", "Û", "U", /* latin capital letter U with circumflex */
5.102 - "Ü", "Ü", "U", /* latin capital letter U with diaeresis */
5.103 - "Ý", "Ý", "Y", /* latin capital letter Y with acute */
5.104 - "Þ", "Þ", "TH", /* latin capital letter THORN */
5.105 - "ß", "ß", "sz", /* latin small letter sharp s = ess-zed */
5.106 - "à", "à", "a", /* latin small letter a with grave */
5.107 - "á", "á", "a", /* latin small letter a with acute */
5.108 - "â", "â", "a", /* latin small letter a with circumflex */
5.109 - "ã", "ã", "a", /* latin small letter a with tilde */
5.110 - "ä", "ä", "a", /* latin small letter a with diaeresis */
5.111 - "å", "å", "a", /* latin small letter a with ring above */
5.112 - "æ", "æ", "ae", /* latin small letter ae */
5.113 - "ç", "ç", "c", /* latin small letter c with cedilla */
5.114 - "è", "è", "e", /* latin small letter e with grave */
5.115 - "é", "é", "e", /* latin small letter e with acute */
5.116 - "ê", "ê", "e", /* latin small letter e with circumflex */
5.117 - "ë", "ë", "e", /* latin small letter e with diaeresis */
5.118 - "ì", "ì", "i", /* latin small letter i with grave */
5.119 - "í", "í", "i", /* latin small letter i with acute */
5.120 - "î", "î", "i", /* latin small letter i with circumflex */
5.121 - "ï", "ï", "i", /* latin small letter i with diaeresis */
5.122 - "ð", "ð", "eth", /* latin small letter eth */
5.123 - "ñ", "ñ", "n", /* latin small letter n with tilde */
5.124 - "ò", "ò", "o", /* latin small letter o with grave */
5.125 - "ó", "ó", "o", /* latin small letter o with acute */
5.126 - "ô", "ô", "o", /* latin small letter o with circumflex */
5.127 - "õ", "õ", "o", /* latin small letter o with tilde */
5.128 - "ö", "ö", "o", /* latin small letter o with diaeresis */
5.129 - "÷", "÷", "/", /* division sign */
5.130 - "ø", "ø", "o", /* latin small letter o with stroke */
5.131 - "ù", "ù", "u", /* latin small letter u with grave */
5.132 - "ú", "ú", "u", /* latin small letter u with acute */
5.133 - "û", "û", "u", /* latin small letter u with circumflex */
5.134 - "ü", "ü", "u", /* latin small letter u with diaeresis */
5.135 - "ý", "ý", "y", /* latin small letter y with acute */
5.136 - "þ", "þ", "th", /* latin small letter thorn */
5.137 - "ÿ", "ÿ", "y", /* latin small letter y with diaeresis */
5.138 - "", ""
5.139 -};
5.140 -
5.141 /* special characters */
5.142 #define CHAR_SPACE 32
5.143 #define CHAR_TAB 9
5.144 @@ -352,7 +227,7 @@
5.145 char *linehasmarkup(char *);
5.146 char *losemarkup(char *);
5.147 gboolean tagcomp(const char *,const char *);
5.148 -char *loseentities(char *);
5.149 +void loseentities(char *);
5.150 gboolean isroman(const char *);
5.151 void postprocess_for_DP(char *);
5.152
5.153 @@ -2916,6 +2791,8 @@
5.154 g_tree_unref(qperiod);
5.155 g_set_print_handler(NULL);
5.156 print_as_windows_1252(NULL);
5.157 + if (pswit[MARKUP_SWITCH])
5.158 + loseentities(NULL);
5.159 }
5.160
5.161 /*
5.162 @@ -3210,8 +3087,7 @@
5.163 {
5.164 while (losemarkup(theline))
5.165 ;
5.166 - while (loseentities(theline))
5.167 - ;
5.168 + loseentities(theline);
5.169 }
5.170
5.171 char *losemarkup(char *theline)
5.172 @@ -3233,37 +3109,86 @@
5.173 return NULL;
5.174 }
5.175
5.176 -char *loseentities(char *theline)
5.177 +void loseentities(char *theline)
5.178 {
5.179 int i;
5.180 - char *s,*t;
5.181 - if (!*theline)
5.182 - return NULL;
5.183 - for (i=0;*entities[i].htmlent;i++)
5.184 + gsize nb;
5.185 + char *amp,*scolon;
5.186 + gchar *s,*t;
5.187 + gunichar c;
5.188 + GTree *entities=NULL;
5.189 + GIConv translit=(GIConv)-1,to_utf8=(GIConv)-1;
5.190 + if (!theline)
5.191 {
5.192 - s=strstr(theline,entities[i].htmlent);
5.193 - if (s)
5.194 + if (entities)
5.195 + g_tree_destroy(entities);
5.196 + entities=NULL;
5.197 + if (translit==(GIConv)-1)
5.198 + g_iconv_close(translit);
5.199 + translit=(GIConv)-1;
5.200 + if (to_utf8==(GIConv)-1)
5.201 + g_iconv_close(to_utf8);
5.202 + to_utf8=(GIConv)-1;
5.203 + return;
5.204 + }
5.205 + if (!*theline)
5.206 + return;
5.207 + if (!entities)
5.208 + {
5.209 + entities=g_tree_new((GCompareFunc)strcmp);
5.210 + for(i=0;i<G_N_ELEMENTS(HTMLentities);i++)
5.211 + g_tree_insert(entities,HTMLentities[i].name,
5.212 + GUINT_TO_POINTER(HTMLentities[i].c));
5.213 + }
5.214 + if (translit==(GIConv)-1)
5.215 + translit=g_iconv_open("ISO_8859-1//TRANSLIT","UTF-8");
5.216 + if (to_utf8==(GIConv)-1)
5.217 + to_utf8=g_iconv_open("UTF-8","ISO_8859-1");
5.218 + while((amp=strchr(theline,'&')))
5.219 + {
5.220 + scolon=strchr(amp,';');
5.221 + if (scolon)
5.222 {
5.223 - t=g_strdup(s+strlen(entities[i].htmlent));
5.224 - strcpy(s,entities[i].textent);
5.225 - strcat(s,t);
5.226 - g_free(t);
5.227 - return theline;
5.228 + if (amp[1]=='#')
5.229 + {
5.230 + if (amp+2+strspn(amp+2,"0123456789")==scolon)
5.231 + c=strtol(amp+2,NULL,10);
5.232 + else if (amp[2]=='x' &&
5.233 + amp+3+strspn(amp+3,"0123456789abcdefABCDEF")==scolon)
5.234 + c=strtol(amp+3,NULL,16);
5.235 + }
5.236 + else
5.237 + {
5.238 + s=g_strndup(amp+1,scolon-(amp+1));
5.239 + c=GPOINTER_TO_UINT(g_tree_lookup(entities,s));
5.240 + g_free(s);
5.241 + }
5.242 }
5.243 + else
5.244 + c=0;
5.245 + if (c)
5.246 + {
5.247 + theline=amp;
5.248 + if (c<128 || c>=192 && c<=255) /* An ISO-8859-1 character */
5.249 + theline+=g_unichar_to_utf8(c,theline);
5.250 + else
5.251 + {
5.252 + s=g_malloc(6);
5.253 + nb=g_unichar_to_utf8(c,s);
5.254 + t=g_convert_with_iconv(s,nb,translit,NULL,&nb,NULL);
5.255 + g_free(s);
5.256 + s=g_convert_with_iconv(t,nb,to_utf8,NULL,&nb,NULL);
5.257 + g_free(t);
5.258 + memcpy(theline,s,nb);
5.259 + g_free(s);
5.260 + theline+=nb;
5.261 + }
5.262 + memmove(theline,g_utf8_next_char(scolon),
5.263 + strlen(g_utf8_next_char(scolon))+1);
5.264 + }
5.265 + else
5.266 + theline=g_utf8_next_char(amp);
5.267 }
5.268 - for (i=0;*entities[i].htmlnum;i++)
5.269 - {
5.270 - s=strstr(theline,entities[i].htmlnum);
5.271 - if (s)
5.272 - {
5.273 - t=g_strdup(s+strlen(entities[i].htmlnum));
5.274 - strcpy(s,entities[i].textent);
5.275 - strcat(s,t);
5.276 - g_free(t);
5.277 - return theline;
5.278 - }
5.279 - }
5.280 - return NULL;
5.281 }
5.282
5.283 gboolean tagcomp(const char *strin,const char *basetag)
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
6.2 +++ b/bookloupe/gen-html-entities.sh Thu May 30 17:16:37 2013 +0100
6.3 @@ -0,0 +1,36 @@
6.4 +#!/bin/sh
6.5 +
6.6 +header()
6.7 +{
6.8 +cat << EOF
6.9 +/*
6.10 + * Automatically generated by gen-html-entities. Do not edit by hand.
6.11 + */
6.12 +
6.13 +struct {
6.14 + char *name;
6.15 + gunichar c;
6.16 +} HTMLentities[] = {
6.17 +EOF
6.18 +}
6.19 +
6.20 +parse_ent_file()
6.21 +{
6.22 + awk '/<!ENTITY .* CDATA/ { \
6.23 + c=substr($4,4,length($4)-5);
6.24 + print " { \"" $2 "\", " c, "}," } \
6.25 + ' $1
6.26 +}
6.27 +
6.28 +footer()
6.29 +{
6.30 +cat << EOF
6.31 +};
6.32 +EOF
6.33 +}
6.34 +
6.35 +header
6.36 +for file; do
6.37 + parse_ent_file $file
6.38 +done
6.39 +footer
7.1 --- a/configure.ac Thu May 30 07:31:24 2013 +0100
7.2 +++ b/configure.ac Thu May 30 17:16:37 2013 +0100
7.3 @@ -13,7 +13,7 @@
7.4 test/compatibility/Makefile
7.5 doc/Makefile
7.6 ])
7.7 -AM_INIT_AUTOMAKE(no-define,1.11)
7.8 +AM_INIT_AUTOMAKE([no-define 1.11])
7.9 AM_SILENT_RULES([yes])
7.10 AC_CANONICAL_HOST
7.11