# This module contains some low-level character-handling stuff. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Look up a single HTML escape-sequence symbol ("entity") in AStbl, # # or check to see if it's numeric. The return value is the single # # character corresponding to the symbol. - - - - - - - - - - - - - - # # - - - - - - - - - - - - - - - - - - - - # proc Chr s { global V me AStbl if [info exists AStbl($s)] { set val $AStbl($s) if {$V>4} {puts "$me/Chr: AStbl([DSP $s]) is $val"} } elseif [scan $s {%d} val] { } else { if {$V>0} {puts "$me/Chr: ### &$s; not known ###"} return &$s; } format {%c} $val } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Scan x for HTML-encoded characters, and replace them with the chars # # that they represent. There are two sorts of encodings that we look # # for here: "&foo;" is a symbolic encoding, and is looked up in the # # AStbl array above. "&#ddd;" is a numeric encoding, and is converted # # to the corresponding binary byte. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # proc Hdecode x { global V me AStbl set v {} global V me while [regexp {^(.*)&(#?)([A-Za-z0-9]+);(.*)$} $x {} x y s z] { if {$y != {}} { scan $s {%d} val set c [format {%c} $val] } elseif [info exists AStbl($s)] { set c [format {%c} $AStbl($s)] } else { set c "&$s;" } set v "$c$z$v" } return "$x$v" } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Here is our table of symbolic char names. Note the decimal values. # # Note also that some of these are case sensitive in the first char, # # to distinguish marked upper- and lower- case letters. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # global AStbl array set AStbl { sp 32 quot 34 amp 38 lt 60 gt 62 nbsp 160 iexcl 161 cent 162 pound 163 curren 164 yen 165 brvbar 166 sect 167 uml 168 copy 169 ordf 170 laquo 171 not 172 shy 173 reg 174 macr 175 deg 176 plusmn 177 sup2 178 sup3 179 acute 180 micro 181 para 182 middot 183 cedil 184 sup1 185 ordm 186 raquo 187 frac14 188 frac12 189 frac34 190 iquest 191 Agrave 192 Aacute 193 Acirc 194 Atilde 195 Auml 196 Aring 197 AElig 198 Aelig 198 Ccedil 199 Egrave 200 Eacute 201 Ecirc 202 Euml 203 Igrave 204 Iacute 205 Icirc 206 Iuml 207 ETH 208 Eth 208 Ntilde 209 Ograve 210 Oacute 211 Ocirc 212 Otilde 213 Ouml 214 times 215 Oslash 216 Ugrave 217 Uacute 218 Ucirc 219 Uuml 220 Yacute 221 THORN 222 Thorn 222 szlig 223 agrave 224 aacute 225 acirc 226 atilde 227 auml 228 aring 229 aelig 230 ccedil 231 egrave 232 eacute 233 ecirc 234 euml 235 igrave 236 iacute 237 icirc 238 iuml 239 eth 240 ntilde 241 ograve 242 oacute 243 ocirc 244 otilde 245 ouml 246 divide 247 oslash 248 ugrave 249 uacute 250 ucirc 251 uuml 252 yacute 253 thorn 254 yuml 255 }