Index: openacs-4/packages/news-aggregator/news-aggregator.info =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/news-aggregator.info,v diff -u -r1.5 -r1.6 --- openacs-4/packages/news-aggregator/news-aggregator.info 10 Nov 2003 16:36:31 -0000 1.5 +++ openacs-4/packages/news-aggregator/news-aggregator.info 20 Mar 2004 11:12:28 -0000 1.6 @@ -6,20 +6,26 @@ News Aggregators f f - - - Simon Carstensen + na + + + Guan Yang + Simon Carstensen Read news sources from your website. - 2003-11-10 + 2004-03-20 The news aggregator periodically reads a set of news sources, in one of several XML-based formats, finds the new bits, and displays them in reverse-chronological order on a single page. - + + + + + Fisheye: Tag 1.3 refers to a dead (removed) revision in file `openacs-4/packages/news-aggregator/sql/oracle/news-aggregator-create.sql'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.4 refers to a dead (removed) revision in file `openacs-4/packages/news-aggregator/sql/oracle/news-aggregator-drop.sql'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.4 refers to a dead (removed) revision in file `openacs-4/packages/news-aggregator/sql/oracle/news-aggregator-package-create.sql'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.3 refers to a dead (removed) revision in file `openacs-4/packages/news-aggregator/sql/oracle/news-aggregator-package-drop.sql'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.2 refers to a dead (removed) revision in file `openacs-4/packages/news-aggregator/sql/oracle/upgrade/upgrade-0.1d-0.2d.sql'. Fisheye: No comparison available. Pass `N' to diff? Index: openacs-4/packages/news-aggregator/sql/postgresql/news-aggregator-create.sql =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/sql/postgresql/news-aggregator-create.sql,v diff -u -r1.3 -r1.4 --- openacs-4/packages/news-aggregator/sql/postgresql/news-aggregator-create.sql 9 Nov 2003 10:53:56 -0000 1.3 +++ openacs-4/packages/news-aggregator/sql/postgresql/news-aggregator-create.sql 20 Mar 2004 11:12:28 -0000 1.4 @@ -1,192 +1,8 @@ +-- +-- PostgreSQL Data model for the News Aggregator package +-- +-- @author Simon Carstensen (simon@bcuni.net) +-- @creation-date 2003-06-26 -select acs_object_type__create_type ( - 'na_source', -- object_type - 'Sources', -- pretty_name - 'Sources', -- pretty_plural - 'acs_object', -- supertype - 'na_sources', -- table_name - 'source_id', -- id_column - null, -- package_name - 'f', -- abstract_p - null, -- type_extension_table - 'na_source.title' -- name_method -); - - -create table na_sources ( - source_id integer - constraint na_sources_source_id_fk - references acs_objects(object_id) - constraint na_sources_source_id_pk - primary key, - package_id integer - constraint na_sources_package_id_fk - references apm_packages(package_id), - owner_id integer - constraint na_sources_owner_id_fk - references users(user_id), - feed_url varchar(100) - constraint na_sources_feed_url_nn - not null, - link varchar(100), - title varchar(100), - description varchar(255), - updates integer, - last_scanned timestamptz, - last_modified varchar(30) -); - -create table na_items ( - item_id integer - default nextval('na_items_item_id_seq') - primary key, - source_id integer - constraint na_items_source_id_fk - references na_sources (source_id), - link varchar(255), - title varchar(255), - description text, - creation_date timestamptz, - deleted_p boolean -); - -create sequence na_items_item_id_seq; - -create or replace function na_source__new ( - integer, -- source_id - integer, -- package_id - integer, -- owner_id - varchar, -- feed_url - varchar, -- link - varchar, -- title - varchar, -- description - integer, -- updates - timestamptz, -- last_scanned - varchar, -- last_modified - varchar, -- object_type - integer, -- creation_user - varchar -- creation_ip -) returns integer as ' -declare - p_source_id alias for $1; - p_package_id alias for $2; - p_owner_id alias for $3; - p_feed_url alias for $4; - p_link alias for $5; - p_title alias for $6; - p_description alias for $7; - p_updates alias for $8; - p_last_scanned alias for $9; - p_last_modified alias for $10; - p_object_type alias for $11; - p_creation_user alias for $12; - p_creation_ip alias for $13; - v_source_id integer; -begin - v_source_id := acs_object__new ( - p_source_id, - p_object_type, - current_timestamp, - p_creation_user, - p_creation_ip, - p_package_id - ); - - insert into na_sources ( - source_id, - package_id, - owner_id, - feed_url, - link, - title, - description, - updates, - last_scanned, - last_modified - ) values ( - v_source_id, - p_package_id, - p_owner_id, - p_feed_url, - p_link, - p_title, - p_description, - p_updates, - p_last_scanned, - p_last_modified - ); - - PERFORM acs_permission__grant_permission( - v_source_id, - p_owner_id, - ''admin'' - ); - - return v_source_id; - -end;' language 'plpgsql'; - -create or replace function na_source__delete ( - integer -- source_id -) -returns integer as ' -declare - p_source_id alias for $1; -begin - - delete from na_items - where source_id = p_source_id; - - delete from acs_permissions - where object_id = p_source_id; - - delete from na_sources - where source_id = p_source_id; - - raise NOTICE ''Deleting na_source and its belonging items...''; - PERFORM acs_object__delete(p_source_id); - - return 0; - -end;' language 'plpgsql'; - -create or replace function na_item__new ( - integer, -- source_id - varchar, -- link - varchar, -- title - varchar, -- description - timestamptz, -- creation_date - boolean -- deleted_p -) returns integer as ' -declare - p_source_id alias for $1; - p_link alias for $2; - p_title alias for $3; - p_description alias for $4; - p_creation_date alias for $5; - p_deleted_p alias for $6; -begin - - insert into na_items - (source_id, link, title, description, creation_date, deleted_p) - values - (p_source_id, p_link, p_title, p_description, p_creation_date, p_deleted_p); - - return 1; - -end;' language 'plpgsql'; - -create or replace function na_source__title (integer) -returns varchar as ' -declare - p_source_id alias for $1; - v_source_title na_sources.title%TYPE; -begin - select title - into v_source_title - from na_sources - where source_id = p_source_id; - - return v_source_title; -end; -' language 'plpgsql'; +\i news-aggregator-tables-create.sql +\i news-aggregator-packages-create.sql Index: openacs-4/packages/news-aggregator/sql/postgresql/news-aggregator-drop.sql =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/sql/postgresql/news-aggregator-drop.sql,v diff -u -r1.3 -r1.4 --- openacs-4/packages/news-aggregator/sql/postgresql/news-aggregator-drop.sql 9 Nov 2003 10:53:56 -0000 1.3 +++ openacs-4/packages/news-aggregator/sql/postgresql/news-aggregator-drop.sql 20 Mar 2004 11:12:28 -0000 1.4 @@ -1,39 +1,8 @@ ---drop na_item functions -drop function na_item__new (integer,varchar,varchar,varchar,timestamptz,boolean); +-- +-- Drop script for the Media Relationships package +-- +-- @author Simon Carstensen (simon@collaboraid.biz) +-- @creation-date 2003-06-26 ---drop na_item table -drop table na_items; -drop sequence na_items_item_id_seq; - ---drop na_source functions -drop function na_source__new (integer,integer,integer,varchar,varchar,varchar,varchar,integer,timestamptz,varchar,varchar,integer,varchar); -drop function na_source__delete (integer); -drop function na_source__title (integer); - -delete from acs_permissions where object_id in (select source_id from na_sources); - ---drop na_source objects -create function inline_0 () -returns integer as ' -declare - object_rec record; -begin - for object_rec in select object_id from acs_objects where object_type=''na_source'' - loop - perform acs_object__delete( object_rec.object_id ); - end loop; - - return 0; -end;' language 'plpgsql'; - -select inline_0(); -drop function inline_0(); - ---drop na_source table -drop table na_sources; - ---drop na_source type -select acs_object_type__drop_type( - 'na_source', - 't' - ); +\i news-aggregator-packages-drop.sql +\i news-aggregator-tables-drop.sql Fisheye: Tag 1.3 refers to a dead (removed) revision in file `openacs-4/packages/news-aggregator/tcl/news-aggregator-procs-oracle.xql'. Fisheye: No comparison available. Pass `N' to diff? Index: openacs-4/packages/news-aggregator/tcl/news-aggregator-procs-postgresql.xql =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/tcl/Attic/news-aggregator-procs-postgresql.xql,v diff -u -r1.2 -r1.3 --- openacs-4/packages/news-aggregator/tcl/news-aggregator-procs-postgresql.xql 28 Aug 2003 09:41:57 -0000 1.2 +++ openacs-4/packages/news-aggregator/tcl/news-aggregator-procs-postgresql.xql 20 Mar 2004 11:12:29 -0000 1.3 @@ -1,7 +1,6 @@ - postgresql7.1 @@ -55,17 +54,4 @@ - - - update na_sources - set link = :link, - title = :title, - description = :description, - updates = (updates + 1), - last_scanned = now(), - last_modified = :last_modified - where source_id = :source_id - - - Index: openacs-4/packages/news-aggregator/tcl/news-aggregator-procs.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/tcl/news-aggregator-procs.tcl,v diff -u -r1.6 -r1.7 --- openacs-4/packages/news-aggregator/tcl/news-aggregator-procs.tcl 16 Nov 2003 09:21:20 -0000 1.6 +++ openacs-4/packages/news-aggregator/tcl/news-aggregator-procs.tcl 20 Mar 2004 11:12:29 -0000 1.7 @@ -1,74 +1,35 @@ ad_library { - Procs used by the News Aggregator module. + Procs used by the News Aggregator application. + @author Simon Carstensen (simon@bcuni.net) - @creation-date January 2003 + @author Guan Yang (guan@unicast.org) + @creation-date 2003-06-28 } -ad_proc -public util_httpget_full { - url {headers ""} {timeout 30} {depth 0} -} { - Just like ns_httpget, but first optional argument is an ns_set of - headers to send during the fetch. -} { - if {[incr depth] > 10} { - return -code error "util_httpget: Recursive redirection: $url" - } - ns_log Debug "Getting {$url} {$headers} {$timeout} {$depth}" - set http [ns_httpopen GET $url $headers $timeout] - set rfd [lindex $http 0] - close [lindex $http 1] - set headers [lindex $http 2] - set response [ns_set name $headers] - set status [lindex $response 1] - set last_modified [ns_set iget $headers last-modified] - if {$status == 302} { - set location [ns_set iget $headers location] - if {$location != ""} { - ns_set free $headers - close $rfd - return [util_httpget_full $location {} $timeout $depth] - } - } elseif { $status == 304 } { - # The requested variant has not been modified since the time specified - # A conditional get didn't return anything. - ns_set free $headers - close $rfd - return [list "" $status] - } - set length [ns_set iget $headers content-length] - if [string match "" $length] {set length -1} - set err [catch { - while 1 { - set buf [_ns_http_read $timeout $rfd $length] - append page $buf - if [string match "" $buf] break - if {$length > 0} { - incr length -[string length $buf] - if {$length <= 0} break - } - } - } errMsg] - ns_set free $headers - close $rfd - if $err { - global errorInfo - return -code error -errorinfo $errorInfo $errMsg - } - return [list $page $status $last_modified] - } -ad_proc -public na_check_link { - domain - link +namespace eval news_aggregator {} + + +ad_proc -public news_aggregator::check_link { + {-domain:required} + {-link:required} } { + @author Simon Carstensen +} { regexp {(https?://[^/]+)+} $domain domain regexp {(https?://[^/]+)+} $link link return [string equal $link $domain] } -ad_proc -public na_last_scanned { - diff + +ad_proc -public news_aggregator::last_scanned { + {-diff:required} } { + + Returns the number of hours and minutes since the feed was last updated. + + @author Simon Carstensen +} { if {$diff < 120 && $diff > 60} { set to_return "1 hour and " } elseif {$diff >= 60} { @@ -83,197 +44,318 @@ return $to_return } -ad_proc -public na_get_nodes { - nodes +ad_proc -private news_aggregator::dom_set_child_text { + {-node:required} + {-child:required} } { - foreach node_id $nodes { - switch -- [xml_node_get_name $node_id] { - title { - catch {set title [xml_node_get_content $node_id]} - } - "link" { - catch {set link [xml_node_get_content $node_id]} - } - "guid" { - catch {set guid [xml_node_get_content $node_id]} + If node contains a child node named child, + the variable child is set to the text of that node + in the caller's stack frame. + + @author Guan Yang + @creation-date 2003-07-03 +} { + if { [$node hasChildNodes] } { + set child_nodes [$node selectNodes "*\[local-name()='$child'\]"] + if { [llength $child_nodes] == 1 } { + set child_node [lindex $child_nodes 0] + upvar $child var + set var [$child_node text] + } + } +} + +ad_proc -private news_aggregator::channel_parse { + {-channel_node:required} +} { + Takes a tDOM node which is supposed to represent an RSS + channel, and returns a list with the RSS/RDF elements + of that node. This proc should later be extended to + support Dublin Core elements and other funk. + + @author Guan Yang (guan@unicast.org) + @creation-date 2003-07-03 +} { + set properties [list title link description language copyright lastBuildDate docs generator managingEditor webMaster] + + foreach property $properties { + set $property "" + news_aggregator::dom_set_child_text -node $channel_node -child $property + set channel($property) [set $property] + } + + set channel_name [$channel_node nodeName] + + # Do weird Atom-like stuff + if { [string equal $channel_name "feed"] } { + # link + if { [string equal $link ""] } { + # Link is in a href + set link_node [$channel_node selectNodes {*[local-name()='link' and @rel = 'alternate' and @type = 'text/html']/@href}] + if { [llength $link_node] == 1 } { + set link_node [lindex $link_node 0] + set channel(link) [lindex $link_node 1] } - "description" { - catch {set description [xml_node_get_content $node_id]} + } + + # author + set author_node [$channel_node selectNodes {*[local-name()='author']}] + if { [llength $author_node] == 1 } { + set author_node [lindex $author_node 0] + news_aggregator::dom_set_child_text -node $author_node -child name + news_aggregator::dom_set_child_text -node $author_node -child email + if { [info exists email] && [info exists name] } { + set channel(managingEditor) "$email ($name)" } - } + } + + # tagline + news_aggregator::dom_set_child_text -node $channel_node -child tagline + if { [info exists tagline] } { + set channel(tagline) $tagline + } } - if { ![exists_and_not_null title] } { - set title "" + return [array get channel] +} + +ad_proc -private news_aggregator::items_fetch { + {-doc_node:required} +} { + Takes a tDOM document node which is supposed to be some + form of RSS. Returns a list with the item nodes. Returns + an empty list if none could be found. + + @author Guan Yang (guan@unicast.org) + @creation-date 2003-07-03 +} { + set items [$doc_node selectNodes {//*[local-name()='item' or local-name()='entry']}] + return $items +} + +ad_proc -private news_aggregator::item_parse { + {-item_node:required} +} { + Takes a tDOM node which is supposed to represent an RSS item, + and returns a list with the RSS/RDF elements of that node. + + @author Simon Carstensen + @author Guan Yang (guan@unicast.org) +} { + set title "" + set link "" + set guid "" + set permalink_p false + set description "" + set content_encoded "" + + news_aggregator::dom_set_child_text -node $item_node -child title + news_aggregator::dom_set_child_text -node $item_node -child link + news_aggregator::dom_set_child_text -node $item_node -child guid + news_aggregator::dom_set_child_text -node $item_node -child description + + set maybe_atom_p 0 + + # Try to handle Atom link + if { [string equal $link ""] } { + set link_attr [$item_node selectNodes {*[local-name()='link']/@href}] + if { [llength $link_attr] == 1 } { + set link [lindex [lindex $link_attr 0] 1] + set maybe_atom_p 1 + } } - if { ![exists_and_not_null link] } { - if { [exists_and_not_null guid] } { - set link $guid - } else { - set link "" + + set encoded_nodes [$item_node selectNodes {*[local-name()='encoded' and namespace-uri()='http://purl.org/rss/1.0/modules/content/']}] + if { [llength $encoded_nodes] == 1 } { + set encoded_node [lindex $encoded_nodes 0] + set content_encoded [$encoded_node text] + } + + if { [llength [$item_node selectNodes "*\[local-name()='guid'\]"]] } { + # If guid exists, we assume that it's a permalink + set permalink_p true + } + + # Retrieve isPermaLink attribute + set isPermaLink_nodes [$item_node selectNodes "*\[local-name()='guid'\]/@isPermaLink"] + if { [llength isPermaLink_nodes] == 1} { + set isPermaLink_node [lindex $isPermaLink_nodes 0] + set isPermaLink [lindex $isPermaLink_node 1] + if { [string equal $isPermaLink false] } { + set permalink_p false } } - if { ![exists_and_not_null description] } { - set description "" + + if { [empty_string_p $link] } { + if { [exists_and_not_null guid] } { + set link $guid + set permalink_p true + } elseif { [empty_string_p $guid] && ![string equal $link $guid] } { + set permalink_p true + } } - - return [list $title $link $description] + + # Try to handle Atom guid + if { [empty_string_p $guid] && $maybe_atom_p } { + news_aggregator::dom_set_child_text -node $item_node -child id + if { [info exists id] } { + # We don't really know if it's an URL + set guid $id + if { [util_url_valid_p $id] } { + set permalink_p true + } else { + set permalink_p false + } + } + } + + # For Atom, description is summary, content is content_encoded + if { $maybe_atom_p } { + news_aggregator::dom_set_child_text -node $item_node -child summary + if { [info exists summary] } { + set description $summary + } + + news_aggregator::dom_set_child_text -node $item_node -child content + if { [info exists content] } { + set content_encoded $content + } + } + + #remove unsafe html + set description [news_aggregator::remove_unsafe_html -html $description] + + return [list title $title link $link guid $guid permalink_p $permalink_p description $description content_encoded $content_encoded] } -ad_proc -public na_sort_result { - result +ad_proc -private news_aggregator::external_entity { + base_uri + system_identifier + public_identifier } { + A callback for tDOM to resolve external entities. + + @author Guan Yang + @creation-date 2003-07-03 +} { + return [list string "" ""] +} + +ad_proc -public news_aggregator::sort_result { + {-result:required} +} { + @author Simon Carstensen +} { set sorted [list] for {set i 0} {$i < [llength $result]} {incr i} { lappend sorted [lindex $result end-$i] } return $sorted } -ad_proc -public na_parse { - xml +ad_proc -public news_aggregator::parse { + {-xml:required} } { + The workhorse of news-aggregator: A Very Ugly RSS Parser. + Now also supports Atom and weird formats in between. + + @author Simon Carstensen + @author Guan Yang (guan@unicast.org) +} { if { [catch { - set doc_id [xml_parse -persist $xml] - set nodes [xml_node_get_children [xml_doc_get_first_node $doc_id]] - set channel [na_get_elements $nodes "channel"] - set result [list [na_get_nodes [xml_node_get_children $channel]]] + # Pre-process the doc and remove any processing instruction + regsub {^<\?xml [^\?]+\?>} $xml {} xml + set doc [dom parse $xml] + set doc_node [$doc documentElement] + set node_name [$doc_node nodeName] - set items [na_get_elements $nodes "item"] - set items_sorted [na_sort_result $items] - foreach item $items_sorted { - lappend result [na_get_nodes [xml_node_get_children $item]] - } + # feed is the doc-node name for Atom feeds + if { [lsearch {rdf RDF rdf:RDF rss feed} $node_name] == -1 } { + ns_log Debug "news_aggregator::parse: doc node name is not rdf, RDF, rdf:RDF or rss" + set rss_p 0 + } else { + set rss_p 1 + } + } errmsg] } { + ns_log Debug "news_aggregator::parse: error in initial itdom parse, errmsg = $errmsg" + set rss_p 0 + } + + if { !$rss_p } { + # not valid xml, let's try autodiscovery + ns_log Debug "news_aggregator::parse: not valid xml, we'll try autodiscovery" + + set doc [dom parse -html $xml] + set doc_node [$doc documentElement] + + set link_nodes [$doc_node selectNodes {/html/head/link[@rel='alternate' and @title='RSS' and @type='application/rss+xml']/@href}] + + $doc delete + + if { [llength $link_nodes] == 1} { + set link_node [lindex $link_nodes 0] + set feed_url [lindex $link_node 1] + array set f [ad_httpget -url $feed_url] + return [news_aggregator::parse -xml $f(page)] + } + return 0 + } + + if { [catch { + set doc_name [$doc_node nodeName] + if { [string equal $doc_name "feed"] } { + # It's an Atom feed + set channel [news_aggregator::channel_parse \ + -channel_node $doc_node] + } else { + # It looks RSS/RDF'fy + set channel [news_aggregator::channel_parse \ + -channel_node [$doc_node getElementsByTagName channel]] + } + + set item_nodes [news_aggregator::items_fetch -doc_node $doc_node] + set item_nodes [news_aggregator::sort_result -result $item_nodes] + set items [list] + + foreach item_node $item_nodes { + lappend items [news_aggregator::item_parse -item_node $item_node] + } + $doc delete } err] } { - ns_log Notice "Error parsing RSS feed: $err" return 0 } else { - return $result + return [list channel "$channel" items "$items"] } } -ad_proc -public na_add_source { - feed_url - owner_id - package_id - source_id +ad_proc -public news_aggregator::remove_unsafe_html { + -html:required } { - Parse $feed_url for host_url, title, and description. Then add the source. -} { - if { ![catch {set f [util_httpget_full $feed_url]}] && [string equal 200 [lindex $f 1]] } { - set result [na_parse [lindex $f 0]] - if { ![string equal 0 $result] } { + Make sure we are consuming RSS safely by removing unsafe tags. - set channel [lindex $result 0] - set title [string_truncate -len 245 -no_format -- [lindex $channel 0]] - set link [string_truncate -len 245 -no_format -- [lindex $channel 1]] - set description [string_truncate -len 245 -no_format -- [lindex $channel 2]] - set source_id [db_nextval "acs_object_id_seq"] - set creation_ip [ns_conn peeraddr] - set last_modified [lindex $f 2] - - # check whether the source already exists - if { ![db_0or1row source { *SQL* }] } { - db_exec_plsql add_source { *SQL* } - set items [lrange $result 1 end] - foreach item $items { - set title [string_truncate -len 245 -no_format -- [lindex $item 0]] - set link [string_truncate -len 245 -no_format -- [lindex $item 1]] - set description [lindex $item 2] - db_exec_plsql add_item { *SQL* } - } - } - } - } -} + See http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely.html. -ad_proc -public na_update_source { - owner_id - source_id - feed_url - last_modified + @author Simon Carstensen + @creation-date 2003-07-06 } { - Parse source and then update the source if it has changed. -} { - set header [ns_set create] - ns_set put $header "If-Modified-Since" $last_modified - if { [catch { - set f [util_httpget_full $feed_url $header] }] - } { - return - } - # check the http status code - if { [exists_and_not_null f] && [string equal 200 [lindex $f 1]] } { - set result [na_parse [lindex $f 0]] - set host [lindex [lindex $result 0] 1] - set items [lrange $result 1 end] - foreach item $items { - set title [string_truncate -len 245 -no_format -- [lindex $item 0]] - set link [string_truncate -len 245 -no_format -- [lindex $item 1]] - set description [lindex $item 2] - - # check whether link and description have been set as we - # need these to check against already added items - # also we check whether link is an external or internal URL - # if not, it might occur in other items, and we can't check against it - if { [exists_and_not_null link] && [na_check_link $link $host] } { - set identifier "link" - } elseif { [exists_and_not_null description] } { - set identifier "description" - } else { - set identifier "none" - } - - # check whether the item already exists - # that it has at least a link or description - # and that we're not handling a deleted item - - if { ![string equal "none" $identifier] } { - if {![db_0or1row item { *SQL* }]} { - db_exec_plsql add_item { *SQL* } - set updated_p 1 - } elseif { [string equal f $deleted_p] && ((![string equal $title $item_title]) || (![string equal $description $item_description])) } { - db_dml update_item { *SQL* } - set updated_p 1 - } - } - } - if { [exists_and_not_null updated_p] } { - # one or more items were added/updated - # let's update the rss metadata as well - set channel [lindex $result 0] - set last_modified [lindex $f 2] - set title [string_truncate -len 245 -no_format -- [lindex $channel 0]] - set link [string_truncate -len 245 -no_format -- [lindex $channel 1]] - set description [string_truncate -len 245 -no_format -- [lindex $channel 2]] - - db_dml update_source { *SQL* } - } + set unsafe_tags { + script + embed + object + frameset + frame + iframe + meta + link + style } -} -ad_proc -public na_update_sources { } { - Update sources by a one hour interval. -} { - ns_log Debug "na_update_sources: updating news-aggregator sources" - - db_foreach sources { *SQL* } { - na_update_source $owner_id $source_id $feed_url $last_modified + foreach tag $unsafe_tags { + regsub -all "(<$tag\[^>\]*>(\[^<\]*)?)+" $html {} html } + return $html } - -# schedule hourly updates -ad_schedule_proc 600 na_update_sources - -ad_proc -public na_cleanup_items {} { - Clean up the items that have been retrieved more than two months ago. -} { - db_dml deleted_items { *SQL* } -} - -# schedule daily cleanup of one-week-old deleted items -#ad_schedule_proc 86400 na_cleanup_items Index: openacs-4/packages/news-aggregator/tcl/news-aggregator-procs.xql =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/tcl/Attic/news-aggregator-procs.xql,v diff -u -r1.2 -r1.3 --- openacs-4/packages/news-aggregator/tcl/news-aggregator-procs.xql 28 Aug 2003 09:41:57 -0000 1.2 +++ openacs-4/packages/news-aggregator/tcl/news-aggregator-procs.xql 20 Mar 2004 11:12:29 -0000 1.3 @@ -41,17 +41,4 @@ - - - update na_sources - set link = :link, - title = :title, - description = :description, - updates = (updates + 1), - last_scanned = sysdate, - last_modified = :last_modified - where source_id = :source_id - - - Fisheye: Tag 1.3 refers to a dead (removed) revision in file `openacs-4/packages/news-aggregator/www/index-oracle.xql'. Fisheye: No comparison available. Pass `N' to diff? Index: openacs-4/packages/news-aggregator/www/index-postgresql.xql =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/www/Attic/index-postgresql.xql,v diff -u -r1.2 -r1.3 --- openacs-4/packages/news-aggregator/www/index-postgresql.xql 28 Aug 2003 09:41:57 -0000 1.2 +++ openacs-4/packages/news-aggregator/www/index-postgresql.xql 20 Mar 2004 11:12:29 -0000 1.3 @@ -3,25 +3,34 @@ postgresql7.1 - - - select s.source_id, - s.link, - s.description, - s.title, - to_char(creation_date, 'YYYY-MM-DD HH24:MI:SS') as last_scanned, - to_char(creation_date, 'YYYY-MM-DD HH24') as sort_date, - feed_url, - item_id, - i.title as item_title, - i.link as item_link, - i.description as item_description - from na_sources s, na_items i - where owner_id = :user_id AND deleted_p = '0' - and s.source_id = i.source_id - order by creation_date desc - limit $limit - + + + select item_id + from na_saved_items + where aggregator_id = :aggregator_id + + + + select aggregator_name, + description as aggregator_description, + public_p + from na_aggregators + where aggregator_id = :aggregator_id + + + + + + select + top, bottom + from + na_purges + where + aggregator_id = :aggregator_id + order by top desc, bottom desc + + + Index: openacs-4/packages/news-aggregator/www/index.adp =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/www/index.adp,v diff -u -r1.4 -r1.5 --- openacs-4/packages/news-aggregator/www/index.adp 26 Sep 2003 05:44:59 -0000 1.4 +++ openacs-4/packages/news-aggregator/www/index.adp 20 Mar 2004 11:12:29 -0000 1.5 @@ -1,78 +1,84 @@ + @page_title@ + @context@ -News Aggregator - -@context_bar;noquote@ - -
- - - - - - - - - -
- -
- + +

+ » Manage Subscriptions
+ » Manage This Aggregator
+

+ + @aggregator_description@ + + This page lists the most recent items from the feeds you've subscribed to. + You can hit the Purge button to clean out the page. Clicking the Save icon Save will prevent an item from being purged. + Click on the Post icon Post this item to your Weblog to add the item to your weblog. + +

+
+ +

+ - This page lists the most recent items from the feeds you've subscribed to. + + No items. + + + + + + +
+ + + + + + + + + + + + + + +
+ @items.title@, + updated @items.diff@ + Technorati Cosmos +
+ @items.content@ + + # + + + + + Save + + + Unsave + + Post this item to your Weblog + +
+
+
- - Click on the Post button to post an item to your weblog. - + + + - You can delete stories from this page by checking the items that you want to delete and then clicking - the Delete button. - + +

- - - No feeds. - - - - - - - - - - -
- - - - - - - - - - - - - - - - - -
 @items.title@, @items.diff@, updateRSS
- @items.content;noquote@ - - # - - - G - -  
-
-
-
-
- -

+ Purges: On | Off +

+ + +

+ Purges: On | Off +

+
Index: openacs-4/packages/news-aggregator/www/index.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/www/index.tcl,v diff -u -r1.2 -r1.3 --- openacs-4/packages/news-aggregator/www/index.tcl 28 Aug 2003 09:41:57 -0000 1.2 +++ openacs-4/packages/news-aggregator/www/index.tcl 20 Mar 2004 11:12:29 -0000 1.3 @@ -1,53 +1,202 @@ -#packages/news-aggregator/index.tcl ad_page_contract { The News Aggregator index page. @author Simon Carstensen simon@bcuni.net - @creation-date Jan 2004 + @creation-date 28-06-2003 } { - item_id:integer,notnull,optional,multiple -} -properties { - context_bar:onevalue + aggregator_id:integer,optional + purge_p:boolean,optional } -ad_maybe_redirect_for_registration - set user_id [ad_conn user_id] +set package_id [ad_conn package_id] +set package_url [ad_conn package_url] -set context_bar [ad_context_bar] +if { ![info exists aggregator_id] } { + # Check whether the user has an aggregator + if { !$user_id } { + ad_redirect_for_registration + ad_script_abort + } -if { ![empty_string_p [ad_parameter "blogger_url"]] } { - set blogger_url "/[ad_parameter "blogger_url"]/entry-edit" -} + set aggregator_id [news_aggregator::aggregator::user_default -user_id $user_id] -ad_form -name items -form { - {delete_submit:text(submit) {label "Delete" } } -} -on_submit { - if {[exists_and_not_null item_id]} { - foreach delete_id $item_id { - db_dml delete_item { *SQL* } - } + if { !$aggregator_id } { + + set user_name [db_string select_user_name {}] + set aggregator_name "${user_name}'s News Aggregator" + + set aggregator_id [news_aggregator::aggregator::new \ + -aggregator_name $aggregator_name \ + -package_id $package_id \ + -public_p 0 \ + -creation_user $user_id \ + -creation_ip [ad_conn peeraddr]] + + #load preinstalled subscriptions into aggregator + news_aggregator::aggregator::load_preinstalled_subscriptions \ + -aggregator_id $aggregator_id \ + -package_id $package_id } - ad_returnredirect "." - ad_script_abort + ad_returnredirect "$aggregator_id" } +set write_p [permission::permission_p \ + -object_id $aggregator_id \ + -privilege write] + +db_1row aggregator_info {} + +#if { $public_p == "f" } { +# permission::require_permission \ +# -object_id $aggregator_id \ +# -privilege write] +#} + +set page_title $aggregator_name +set context [list $page_title] + +set package_url [ad_conn package_url] +set url "$package_url$aggregator_id/" +set graphics_url "${package_url}graphics/" +set return_url [ad_conn url] +set aggregator_url [export_vars -base aggregator { return_url aggregator_id }] + set limit [ad_parameter "number_of_items_shown"] +set sql_limit [expr 7*$limit] -db_multirow -extend { content diff update_url } items items { *SQL* } { +set top 0 +set bottom 1073741824 - set text_only [util_remove_html_tags $item_description] +set counter 0 - if {[exists_and_not_null item_title] && ![string equal -nocase $item_title $text_only] } { - set content "$item_title. $item_description" +if { [info exists purge_p] && $public_p == "f" && $purge_p == "f" } { + set purge_p 0 +} elseif { $public_p == "t" } { + set purge_p 0 +} else { + set purge_p 1 +} + +# We only handle purges if the aggregator is not public +if { $purge_p } { +# set items_purges [db_map items_purges] + set purges [db_list_of_lists purges ""] + set saved_items [db_list saved_items ""] +} else { +# set items_purges "" + set purges [list] + set saved_items [list] +} + +if { $purge_p } { + set items_query [news_aggregator::aggregator::items_sql \ + -aggregator_id $aggregator_id \ + -package_id $package_id \ + -purge_p $purge_p] +} else { + set items_query [news_aggregator::aggregator::items_sql \ + -aggregator_id $aggregator_id \ + -package_id $package_id \ + -purge_p $purge_p \ + -limit_multiple 1] +} + +db_multirow -extend { + content + diff + source_url + save_url + unsave_url + item_blog_url + technorati_url + item_guid_link +} items items $items_query { + # Top is the first item + if { $item_id > $top } { + set top $item_id + } + + set purged_p 0 + # Handle purged items + foreach purge $purges { + if { $item_id <= [lindex $purge 0] && $item_id >= [lindex $purge 1] && + [lsearch $saved_items $item_id] == -1 } { + set purged_p 1 + } + } + if { $purged_p } { + continue + } + + if { [exists_and_not_null content_encoded] } { + if { [exists_and_not_null item_title] } { + set content "$item_title. $content_encoded" + } else { + set content $content_encoded + } } else { - set content $item_description + set text_only [util_remove_html_tags $item_description] + + if { [exists_and_not_null item_title] } { + set content "$item_title. $item_description" + } else { + set content $item_description + } } + + if { $item_permalink_p == "t" } { + set item_guid_link $item_original_guid + } else { + set item_guid_link $item_link + } - set diff [na_last_scanned [expr [expr [clock seconds] - [clock scan $last_scanned]] / 60]] + set diff [news_aggregator::last_scanned -diff [expr [expr [clock seconds] - [clock scan $last_scanned]] / 60]] + set source_url [export_vars -base source {source_id}] + set technorati_url "http://www.technorati.com/cosmos/links.html?url=$link&sub=Get+Link+Cosmos" - set update_url "source-update?[export_vars { user_id source_id feed_url last_scanned}]" + if { [string equal $write_p "1"] } { + if { [lsearch $saved_items $item_id] == -1 } { + set save_url [export_vars -base "${url}item-save" {item_id}] + set unsave_url "" + } else { + set unsave_url [export_vars -base "${url}item-unsave" {item_id}] + set save_url "" + } + set item_blog_url [export_vars -base "${url}item-blog" {item_id}] + } + + if { $item_id < $bottom } { + set bottom $item_id + } + + incr counter + if { $counter > $limit } { + break + } } -ad_return_template +if { [exists_and_not_null top] && [exists_and_not_null bottom] && + $top >= $bottom && $public_p == "f" && + [permission::permission_p -party_id $user_id -object_id $aggregator_id -privilege write] } { + + ad_form -name purge -action "[ad_conn package_url]$aggregator_id/purge" -form { + {purge_top:integer(hidden) + {value $top} + } + {purge_bottom:integer(hidden) + {value $bottom} + } + {purge_submit:text(submit) + {label "Purge this page of news"} + {html {accesskey "p"}} + } + } + set purge 1 +} else { + set purge 0 +} + +set purge_off_url "[ad_conn package_url]$aggregator_id/?purge_p=f" +set purge_on_url "[ad_conn package_url]$aggregator_id" Index: openacs-4/packages/news-aggregator/www/index.xql =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/www/Attic/index.xql,v diff -u -r1.2 -r1.3 --- openacs-4/packages/news-aggregator/www/index.xql 28 Aug 2003 09:41:57 -0000 1.2 +++ openacs-4/packages/news-aggregator/www/index.xql 20 Mar 2004 11:12:29 -0000 1.3 @@ -2,10 +2,13 @@ - - - update na_items set deleted_p = '1' where item_id = :delete_id - - + + + select p.first_names || ' ' || p.last_name as user_name + from users u join + persons p on (u.user_id = p.person_id) + where u.user_id = :user_id + + Fisheye: Tag 1.3 refers to a dead (removed) revision in file `openacs-4/packages/news-aggregator/www/source-update.tcl'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.5 refers to a dead (removed) revision in file `openacs-4/packages/news-aggregator/www/subscriptions-oracle.xql'. Fisheye: No comparison available. Pass `N' to diff? Index: openacs-4/packages/news-aggregator/www/subscriptions-postgresql.xql =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/www/subscriptions-postgresql.xql,v diff -u -r1.2 -r1.3 --- openacs-4/packages/news-aggregator/www/subscriptions-postgresql.xql 28 Aug 2003 09:41:57 -0000 1.2 +++ openacs-4/packages/news-aggregator/www/subscriptions-postgresql.xql 20 Mar 2004 11:12:29 -0000 1.3 @@ -3,13 +3,34 @@ postgresql7.1 - - - select na_source__delete( - :delete_id - ); + + + select s.title, + s.source_id, + s.feed_url, + s.link, + s.description, + s.updates, + to_char(s.last_scanned, 'YYYY-MM-DD HH24:MI') as last_scanned, + to_char(s.last_modified_stamp, 'YYYY-MM-DD HH24:MI') as last_modified + from na_sources s join ( + na_subscriptions su join + na_aggregators a on (a.aggregator_id = su.aggregator_id)) + on (s.source_id = su.source_id) + where + a.package_id = :package_id + and a.aggregator_id = :aggregator_id + [ad_decode source "" "" [template::list::orderby_clause -orderby -name sources]] + + - - + + + select na_source__delete( + :delete_id + ); + + + Index: openacs-4/packages/news-aggregator/www/subscriptions.adp =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/www/subscriptions.adp,v diff -u -r1.3 -r1.4 --- openacs-4/packages/news-aggregator/www/subscriptions.adp 26 Sep 2003 04:05:51 -0000 1.3 +++ openacs-4/packages/news-aggregator/www/subscriptions.adp 20 Mar 2004 11:12:29 -0000 1.4 @@ -1,56 +1,36 @@ + @page_title@ + @context@ -Subscriptions +

+ Enter the URL of an XML news feed you want to subscribe to, then click on the Add button. +

-@context_bar;noquote@ + + + style="border: 1px red dashed"> + + + +
+ Source URL: + + +

+ Congratulations, you have been subscribed to @new_source_title@. +

+
+
+
-

+ +

+ The following table lists the XML news feeds you've subscribed to. + To delete a subscription, check it and then click on the Unsubscribe button + at the bottom of the page. +

+ - - - - -
- - Enter the URL of an XML news feed you want to subscribe to in the box below, - then click on the Add button. - - - URL: - - - - - The following table lists the XML news feeds you've subscribed to. Included is the name of the - source, linked to its Web page, the time or day it last changed, the number of times it has - changed since you subscribed, and a link to the XML file for the channel. To delete a subscription, - check it and then click on the Unsubscribe button at the bottom of the page. - - - - -
- - - - -
- - - - - - - - - - -
@sources.title@@sources.last_scanned@@sources.updates@RSS
-
-
- - - - -
-
+

+ +

Index: openacs-4/packages/news-aggregator/www/subscriptions.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/www/subscriptions.tcl,v diff -u -r1.2 -r1.3 --- openacs-4/packages/news-aggregator/www/subscriptions.tcl 28 Aug 2003 09:41:57 -0000 1.2 +++ openacs-4/packages/news-aggregator/www/subscriptions.tcl 20 Mar 2004 11:12:29 -0000 1.3 @@ -1,59 +1,155 @@ -#packages/news-aggregator/subscriptions.tcl - ad_page_contract { The News Aggregator subscription page. @author Simon Carstensen (simon@bcuni.net) @creation-date Jan 2003 } { - source_id:integer,notnull,optional,multiple + aggregator_id:integer + new_source_id:integer,optional + {source_id:integer,multiple ""} {feed_url ""} -} -properties { - context_bar:onevalue + {orderby ""} } -ad_maybe_redirect_for_registration - permission::require_permission \ - -object_id [ad_conn package_id] \ + -object_id $aggregator_id \ -privilege write -set user_id [ad_conn user_id] +set page_title "Subscriptions" +set context [list $page_title] -set context_bar [ad_context_bar "Add Subscription"] - +set user_id [ad_conn user_id] set package_id [ad_conn package_id] +set package_url [ad_conn package_url] -ad_form -name add_subscription -form { +set opml_url "${package_url}opml/$aggregator_id/mySubscriptions.opml" - new_source_id:key +# This is done in case we want to implement some user interface +# stuff in the future where it might be useful. +if { [empty_string_p $feed_url] } { + set feed_url_val "" +} else { + set feed_url_val $feed_url +} + + #ad_returnredirect "[ad_conn package_url]opml/$aggregator_id/mySubscriptions.opml" + #ad_script_abort - {feed_url:text(text) {value "http://"} {label "URL:"} {html { size 55 }}} - {add_submit:text(submit) {label "Add"}} -} -validate { - {feed_url - {[exists_and_not_null feed_url] && ![string equal "http://" $feed_url]} "You must specify a URL." +if { [exists_and_not_null source_id] } { + foreach delete_id $source_id { + news_aggregator::subscription::delete \ + -source_id $delete_id \ + -aggregator_id $aggregator_id } -} -new_data { - na_add_source $feed_url $user_id $package_id :key - ad_returnredirect "subscriptions" - ad_script_abort } -db_multirow sources sources { *SQL* } +set aggregator_count [db_string count_aggregators {}] -ad_form -name delete_subscription -form { +set bulk_actions { + Unsubscribe subscriptions Unsubscribe +} - {delete_submit:text(submit) {label "Unsubscribe" } } -} -on_submit { - if {[exists_and_not_null source_id]} { - foreach delete_id $source_id { - db_exec_plsql delete_source { *SQL* } +if { $aggregator_count > 1 } { + # user has more than 1 aggregator, let's present our fancy move and copy features + if { $aggregator_count > 2 } { + set title "another aggregator" + } else { + set title [db_string select_name {}] + } + lappend bulk_actions \ + Copy subscription-copy "Copy selected subscriptions to $title" \ + Move subscription-move "Move selected subscriptions to $title" + +} + +list::create \ + -name sources \ + -multirow sources \ + -key source_id \ + -row_pretty_plural "subscriptions" \ + -actions { + "Export Subscriptions" "opml" "Export your subscriptions as an OPML file" + "Import Subscriptions" "opml-import" "Import your subscriptions from an OPML file" + } -bulk_actions $bulk_actions -elements { + title { + label "Name" + link_url_eval $link + } + last_scanned { + label "Last Scan" + } + last_modified { + label "Last Update" } + updates { + label "Updates" + html {align center} + } + feed_url { + label "Source" + display_template { + View the XML source for this subscription + } + } + } -orderby { + default_value title,asc + title { + label "Name" + orderby_asc "lower(title) asc" + orderby_desc "lower(title) desc" + } + last_scanned { + label "Last Updated" + orderby_desc "last_scanned desc" + orderby_asc "last_scanned asc" + } + last_modified { + label "Last Update" + orderby_desc "last_modified_stamp desc" + orderby_asc "last_modified_stamp asc" + } + updates { + label "Updates" + orderby_asc "updates asc" + orderby_desc "updates desc" + } } - ad_returnredirect "subscriptions" - ad_script_abort +set package_url [ad_conn package_url] + +db_multirow -extend {xml_graphics_url} sources sources {} { + if { [exists_and_not_null new_source_id] && $source_id == $new_source_id } { + set new_source_title $title + } + set xml_graphics_url "${package_url}graphics/xml.gif" } -ad_return_template +ad_form -name add_subscription -form { + {subscription_id:integer(hidden),key} + {feed_url:text(text) + {value $feed_url_val} + {label "URL:"} + {html {size 55}} + } + {add_submit:text(submit) + {label "Add"} + } +} -validate { + {feed_url + { [exists_and_not_null feed_url] && ![string equal "http://" $feed_url] } + { You must specify a URL } + } +} -new_data { + set new_source_id [news_aggregator::source::new \ + -feed_url $feed_url \ + -aggregator_id $aggregator_id \ + -user_id $user_id \ + -package_id $package_id] + + + ad_returnredirect [export_vars -base subscriptions {new_source_id}] + ad_script_abort +} Index: openacs-4/packages/news-aggregator/www/subscriptions.xql =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/www/subscriptions.xql,v diff -u -r1.2 -r1.3 --- openacs-4/packages/news-aggregator/www/subscriptions.xql 28 Aug 2003 09:41:57 -0000 1.2 +++ openacs-4/packages/news-aggregator/www/subscriptions.xql 20 Mar 2004 11:12:29 -0000 1.3 @@ -2,19 +2,25 @@ - - - select title, - source_id, - feed_url, - link, - description, - updates, - to_char(last_scanned, 'YYYY-MM-DD HH24:MI:SS') as last_scanned - from na_sources - where owner_id = :user_id - order by lower(title) - + + + select count(*) + from na_aggregators a, + acs_objects o + where a.aggregator_id = o.object_id + and o.creation_user = :user_id + + + + select a.aggregator_name + from na_aggregators a, + acs_objects o + where o.object_id = a.aggregator_id + and a.aggregator_id != :aggregator_id + and o.creation_user = :user_id + + + Fisheye: Tag 1.3 refers to a dead (removed) revision in file `openacs-4/packages/news-aggregator/www/update.tcl'. Fisheye: No comparison available. Pass `N' to diff?