| |
238 |
238 |
return $html |
| |
239 |
239 |
} |
| |
240 |
240 |
|
| |
241 |
241 |
ad_proc -public feed_parser::parse_feed { |
| |
242 |
242 |
-xml:required |
| |
243 |
243 |
{-autodiscover:boolean 1} |
| |
244 |
244 |
} { |
| |
245 |
245 |
Parse a string believed to be a syndication feed. |
| |
246 |
246 |
|
| |
247 |
247 |
@author Guan Yang (guan@unicast.org) |
| |
248 |
248 |
@creation-date 2003-12-28 |
| |
249 |
249 |
@param xml A string containing an XML document. |
| |
250 |
250 |
@param autodiscover If true, this procedure will, if the string turns at |
| |
251 |
251 |
first glance not to be an XML document, treat it as an HTML |
| |
252 |
252 |
document and attempt to extract an RSS autodiscovery element. |
| |
253 |
253 |
If such an element is found, the URL will be retrieved using |
| |
254 |
254 |
ad_httpget and this procedure will be applied to the content |
| |
255 |
255 |
of that URL. |
| |
256 |
256 |
@return A Tcl array-list data structure. |
| |
257 |
257 |
} { |
| |
258 |
|
# Unless we have explicit encoding information, we'll assume UTF-8 |
| |
259 |
|
if { [regexp {^[[:space:]]*<\?xml[^>]+encoding="([^"]*)"} $xml match encoding] } { |
| |
260 |
|
set encoding [string tolower $encoding] |
| |
261 |
|
set tcl_encoding [ns_encodingforcharset $encoding] |
| |
262 |
|
if { $tcl_encoding ne "" } { |
| |
263 |
|
set xml [encoding convertfrom $tcl_encoding $xml] |
| |
264 |
|
} |
| |
265 |
|
} |
| |
266 |
|
|
| |
267 |
258 |
# Prefill these slots for errors |
| |
268 |
259 |
set result(channel) "" |
| |
269 |
260 |
set result(items) "" |
| |
270 |
261 |
|
| |
271 |
262 |
if { [catch { |
| |
272 |
|
# Pre-process the doc and remove any processing instruction |
| |
273 |
|
regsub {^<\?xml [^\?]+\?>} $xml {<?xml version="1.0"?>} xml |
| |
274 |
263 |
set doc [dom parse $xml] |
| |
275 |
264 |
set doc_node [$doc documentElement] |
| |
276 |
265 |
set node_name [$doc_node nodeName] |
| |
277 |
266 |
|
| |
278 |
267 |
# feed is the doc-node name for Atom feeds |
| |
279 |
268 |
if { [lsearch {rdf RDF rdf:RDF rss feed} $node_name] == -1 } { |
| |
280 |
269 |
ns_log Debug "feed_parser::parse_feed: doc node name is not rdf, RDF, rdf:RDF or rss" |
| |
281 |
270 |
set rss_p 0 |
| |
282 |
271 |
} else { |
| |
283 |
272 |
set rss_p 1 |
| |
284 |
273 |
} |
| |
285 |
274 |
} errmsg] } { |
| |
286 |
275 |
ns_log Debug "feed_parser::parse_feed: error in initial itdom parse, errmsg = $errmsg" |
| |
287 |
276 |
set rss_p 0 |
| |
288 |
277 |
} |
| |
289 |
278 |
|
| |
290 |
279 |
if { !$rss_p } { |
| |
291 |
280 |
# not valid xml, let's try autodiscovery |
| |
292 |
281 |
ns_log Debug "feed_parser::parse_feed: not valid xml, we'll try autodiscovery" |
| |
293 |
282 |
|