Index: openacs-4/packages/news-aggregator/tcl/source-procs.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/news-aggregator/tcl/source-procs.tcl,v diff -u -r1.18 -r1.19 --- openacs-4/packages/news-aggregator/tcl/source-procs.tcl 2 Apr 2019 09:40:30 -0000 1.18 +++ openacs-4/packages/news-aggregator/tcl/source-procs.tcl 2 Apr 2019 09:53:53 -0000 1.19 @@ -147,38 +147,14 @@ return } - # First we assemble a list of arrays of: - # feed_url, guid - # Then we fetch the guids that we want to deal with - # Finally, we insert those. - set guids [list] + set source_updated_p false foreach item $items { - lappend guids [news_aggregator::source::generate_guid \ - -link [dict get $item link] \ - -feed_url $feed_url \ - -title [dict get $item title] \ - -description [dict get $item description] \ - -guid [dict get $item guid]] - } - - set existing_items [dict create] - set guids '[join $guids "', '"]' - db_foreach items [subst { - select guid, i.title, i.description - from na_items i join - na_sources s on (i.source_id = s.source_id) - where s.feed_url = :feed_url - and guid in ($guids) - }] { - dict set existing_items $guid [list $title $description] - ns_log Debug "source::update: existing guid $guid\n\ttitle = $title" - } - - ns_log Debug "source::update: existing_guids = [dict keys $existing_items]" - - set no_items 0 - set updated_p false - foreach item $items { + set guid [news_aggregator::source::generate_guid \ + -link [dict get $item link] \ + -feed_url $feed_url \ + -title [dict get $item title] \ + -description [dict get $item description] \ + -guid [dict get $item guid]] set title [string_truncate -len 500 -- [dict get $item title]] set link [string_truncate -len 500 -- [dict get $item link]] set original_guid [string_truncate -len 500 -- [dict get $item guid]] @@ -191,44 +167,46 @@ set pub_date [clock format $pub_date -format "%Y-%m-%d %T UTC"] } - set guid [news_aggregator::source::generate_guid \ - -link [dict get $item link] \ - -feed_url $feed_url \ - -title [dict get $item title] \ - -description [dict get $item description] \ - -guid [dict get $item guid]] - - set new_p [expr {![dict exists $existing_items $guid]}] - if {$new_p} { - ns_log Debug "source::update: guid $guid marked as new" - } else { - lassign [dict get $existing_items $guid] db_title db_description - ns_log Debug "source::update: guid $guid marked as existing\ttitle = $db_title\tdescription = $db_description" - } - - if { $new_p || $db_title ne $title || $db_description ne $description } { - set updated_p true - ns_log Debug "source::update: guid $guid marked as existing but updated; title=!$title! description=!$description!" - if { !$new_p } { - ns_log Debug "source::update:\tdb_title=!$db_title! db_description=!$db_description!" - ns_log Debug "source::update:\tfirst_equal=[string equal $db_title $title] second_equal=[string equal $db_description $description] new_p=$new_p item_title=[string length $title] chars db_title=[string length $db_title] chars" + # Notice: we are assuming source_id,guid is a superkey of + # na_items. This is not technically true, but this upgrade + # strategy should enforce it. + set item_exists_p [db_0or1row get_existing_news_item { + select title as existing_title, description as existing_description, link as existing_link + from na_items + where source_id = :source_id + and guid = :guid + }] + if {$item_exists_p} { + set item_updated_p [expr {$title ne $existing_title || + $description ne $existing_description || + $link ne $existing_link}] + if {$item_updated_p} { + db_dml update_news_item { + update na_items set + title = :title, + description = :description, + link = :link + where source_id = :source_id + and guid = :guid + } + ns_log Debug "source::update: news item $source_id.$guid updated" } else { - ns_log Debug "source::update: guid $guid is new and will be inserted; title=$title description=$description" + ns_log Debug "source::update: news item $source_id.$guid found and skipped" } - - db_exec_plsql add_item {} - incr no_items } else { - ns_log Debug "source::update: guid $guid marked as existing and not updated" + db_exec_plsql add_item {} + ns_log Debug "source::update: news item $source_id.$guid created" } + + set source_updated_p [expr {$source_updated_p || !$item_exists_p || $item_updated_p}] } set channel [dict get $result channel] set link [dict get $channel link] set title [dict get $channel title] set description [dict get $channel description] - if { $updated_p } { + if { $source_updated_p } { db_dml update_source { update na_sources set link = :link, @@ -240,6 +218,7 @@ last_modified_stamp = current_timestamp where source_id = :source_id } + ns_log Debug "source::update: news source $source_id updated" } else { db_dml update_source_no_new { update na_sources set @@ -249,6 +228,7 @@ description = :description where source_id = :source_id } + ns_log Debug "source::update: news source $source_id not updated" } }