antoniop
committed
on 24 Jul 18
Trim whitespace
openacs-4/.../tcl/feed-parser-procs.tcl (+0 -11)
238 238     return $html
239 239 }
240 240
241 241 ad_proc -public feed_parser::parse_feed {
242 242     -xml:required
243 243     {-autodiscover:boolean 1}
244 244 } {
245 245     Parse a string believed to be a syndication feed.
246 246
247 247     @author Guan Yang (guan@unicast.org)
248 248     @creation-date 2003-12-28
249 249     @param xml A string containing an XML document.
250 250     @param autodiscover If true, this procedure will, if the string turns at
251 251                 first glance not to be an XML document, treat it as an HTML
252 252                 document and attempt to extract an RSS autodiscovery element.
253 253                 If such an element is found, the URL will be retrieved using
254 254                 ad_httpget and this procedure will be applied to the content
255 255                 of that URL.
256 256     @return A Tcl array-list data structure.
257 257 } {
258       # Unless we have explicit encoding information, we'll assume UTF-8
259       if { [regexp {^[[:space:]]*<\?xml[^>]+encoding="([^"]*)"} $xml match encoding] } {
260           set encoding [string tolower $encoding]
261           set tcl_encoding [ns_encodingforcharset $encoding]
262           if { $tcl_encoding ne "" } {
263               set xml [encoding convertfrom $tcl_encoding $xml]
264           }
265       }
266  
267 258     # Prefill these slots for errors
268 259     set result(channel) ""
269 260     set result(items) ""
270 261
271 262     if { [catch {
272           # Pre-process the doc and remove any processing instruction
273           regsub {^<\?xml [^\?]+\?>} $xml {<?xml version="1.0"?>} xml
274 263         set doc [dom parse $xml]
275 264         set doc_node [$doc documentElement]
276 265         set node_name [$doc_node nodeName]
277 266
278 267         # feed is the doc-node name for Atom feeds
279 268         if { [lsearch {rdf RDF rdf:RDF rss feed} $node_name] == -1 } {
280 269             ns_log Debug "feed_parser::parse_feed: doc node name is not rdf, RDF, rdf:RDF or rss"
281 270             set rss_p 0
282 271         } else {
283 272             set rss_p 1
284 273         }
285 274     } errmsg] } {
286 275         ns_log Debug "feed_parser::parse_feed: error in initial itdom parse, errmsg = $errmsg"
287 276         set rss_p 0
288 277     }
289 278
290 279     if { !$rss_p } {
291 280         # not valid xml, let's try autodiscovery
292 281         ns_log Debug "feed_parser::parse_feed: not valid xml, we'll try autodiscovery"
293 282