Index: openacs-4/packages/acs-tcl/tcl/utilities-procs.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/acs-tcl/tcl/utilities-procs.tcl,v diff -u -r1.122 -r1.123 --- openacs-4/packages/acs-tcl/tcl/utilities-procs.tcl 15 Mar 2010 00:09:00 -0000 1.122 +++ openacs-4/packages/acs-tcl/tcl/utilities-procs.tcl 23 Jul 2010 01:13:13 -0000 1.123 @@ -1459,6 +1459,8 @@ } set length [ns_set iget $headers content-length] if { "" eq $length } {set length -1} + set type [ns_set iget $headers content-type] + set_encoding $type $rfd set err [catch { while 1 { set buf [_ns_http_read $timeout $rfd $length] @@ -1529,6 +1531,73 @@ return [expr { [info exists var] && $var eq $value } ] } +ad_proc -private set_encoding { + {-text_translation {auto binary}} + content_type + channel +} { +

The ad_http* and util_http* machineries depend on the + AOLserver/NaviServer socket I/O layer provided by [ns_sockopen]. + This proc allows you to request Tcl encoding filtering for + ns_sockopen channels (i.e., the read and write channels return by + [ns_sockopen]), to be applied right before performing socket I/O + operations (i.e., reads).

+ +

The major task is to resolve the corresponding Tcl encoding + (e.g.: ascii) for a given IANA/MIME charset name (or alias; e.g.: + US-ASCII); the main resolution scheme is implemented by + [ns_encodingfortype] which is available bother under AOLserver and + NaviServer (see tcl/charsets.tcl). The mappings between Tcl encoding + names (as shown by [encoding names]) and IANA/MIME charset names + (i.e., names and aliases in the sense of IANA's + charater sets registry) is provided by:

+ + + +

[ns_encodingfortype] introduces several levels of precedence + when resolving the actual IANA/MIME charset and the corresponding + Tcl encoding to use:

+ +
    +
  1. The "content_type" string contains a charset specification, + e.g.: "text/xml; charset=UTF-8". This spec fragment takes the + highest precedence.
  2. + +
  3. The "content_type" string points to a "text/*" media subtype, + but does not specify a charset (e.g., "text/xml"). In this case, the + charset defined by ns/parameters/OutputCharset (see config.tcl) + applies. If this parameter is missing, or [ns_encodingfortype] fails + to resolve any Tcl encoding name, the general default is + "iso-8859-1" (see tcl/charsets.tcl; this follows from RFC 2616 (HTTP 1.1); + Section 3.7.1).
  4. + +
  5. If neither case 1 or case 2 become effective, the encoding is + resolved to "binary".
  6. +
+ + + + @author stefan.sobernig@wu.ac.at +} { + set trl [expr {[string match "text/*" $content_type] ? $text_translation : "binary"}] + set enc [ns_encodingfortype $content_type] + if {$enc eq ""} { + set enc [expr {[string match "text/*" $content_type] ? "iso8859-1" : "binary"}] + ns_log debug "--- Resolving a Tcl encoding for the CONTENT-TYPE '$content_type' failed; falling back to '$enc'." + } + fconfigure $channel -translation $trl -encoding $enc +} + ad_proc -public ad_httpget { -url {-headers ""} @@ -1573,8 +1642,11 @@ close $rfd } else { set length [ns_set iget $headers content-length] - if { "" eq $length } {set length -1} - + if { $length eq "" } {set length -1} + + set type [ns_set iget $headers content-type] + set_encoding $type $rfd + set err [catch { while 1 { set buf [_ns_http_read $timeout $rfd $length] @@ -3344,6 +3416,8 @@ set status [lindex $response 1] set length [ns_set iget $headers content-length] if { "" eq $length } { set length -1 } + set type [ns_set iget $headers content-type] + set_encoding $type $rfd set err [catch { while 1 { set buf [_ns_http_read $timeout $rfd $length]