Index: openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl,v diff -u -r1.109.2.54 -r1.109.2.55 --- openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl 8 Jul 2024 02:07:54 -0000 1.109.2.54 +++ openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl 8 Jul 2024 15:17:50 -0000 1.109.2.55 @@ -2409,54 +2409,39 @@ } { array set entities { lt < gt > quot \" ob \{ cb \} amp & } - # Expand HTML entities on the value - for { set i [string first & $html] } { $i != -1 } { set i [string first & $html $i] } { + set mappings [list] - set match_p 0 - switch -regexp -- [string index $html $i+1] { - \# { - switch -regexp -- [string index $html $i+2] { - [xX] { - regexp -indices -start [expr {$i+3}] {[0-9a-fA-F]*} $html hex_idx - set hex [string range $html [lindex $hex_idx 0] [lindex $hex_idx 1]] - set html [string replace $html $i [lindex $hex_idx 1] \ - [subst -nocommands -novariables "\\x$hex"]] - set match_p 1 - } - [0-9] { - regexp -indices -start [expr {$i+2}] {[0-9]*} $html dec_idx - set dec [string range $html [lindex $dec_idx 0] [lindex $dec_idx 1]] - # $dec might contain leading 0s. Since format evaluates $dec as expr - # leading 0s cause octal interpretation and therefore errors on e.g. & - set dec [string trimleft $dec 0] - if {$dec eq ""} { - set dec 0 - } - set html [string replace $html $i [lindex $dec_idx 1] \ - [format "%c" $dec]] - set match_p 1 - } + # + # Extract all entities from the text. The semicolon is optional. + # + set parsed_entities [regexp -all -inline {&([a-zA-Z]+|#[0-9]+|#x[a-zA-Z0-9]+);?} $html] + + foreach {match entity} $parsed_entities { + if {[string index $entity 0] eq "#"} { + if {[string index $entity 1] eq "x"} { + # + # Entity as hexadecimal + # + set code [scan [string range $entity 2 end] %x] + } else { + # + # Entity as decimal character code + # + set code [string trimleft [string range $entity 1 end] 0] + if {$code eq ""} { + set code 0 } } - [a-zA-Z] { - if { [regexp -indices -start $i {\A&([^\s;]+)} $html match entity_idx] } { - set entity [string tolower [string range $html [lindex $entity_idx 0] [lindex $entity_idx 1]]] - if { [info exists entities($entity)] } { - set html [string replace $html $i [lindex $match 1] $entities($entity)] - } - set match_p 1 - } - } + lappend mappings $match [format "%c" $code] + } elseif {[info exists entities($entity)]} { + # + # Entity by name. Only some are supported. + # + lappend mappings $match $entities($entity) } } - incr i - if { $match_p } { - # remove trailing semicolon - if {[string index $html $i] eq ";"} { - set html [string replace $html $i $i] - } - } - return $html + + return [string map $mappings $html] }