Index: openacs-4/packages/acs-content-repository/tcl/content-revision-procs.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/acs-content-repository/tcl/content-revision-procs.tcl,v diff -u -r1.36 -r1.37 --- openacs-4/packages/acs-content-repository/tcl/content-revision-procs.tcl 29 Nov 2018 23:56:23 -0000 1.36 +++ openacs-4/packages/acs-content-repository/tcl/content-revision-procs.tcl 3 Sep 2024 15:37:31 -0000 1.37 @@ -1,6 +1,6 @@ ad_library { - Procedures to manipulate content revisions + CRUD procedures for content revisions @author Dave Bauer (dave@thedesignexperience.org) @creation-date 2004-06-04 @@ -32,44 +32,27 @@ passed in, we determine it from the content item. This is needed to find the attributes for the content type. - @author Dave Bauer (dave@thedesignexperience.org) @creation-date 2004-06-04 @param revision_id - @param item_id - @param content_type - @param title - @param description - @param content - @param mime_type - @param publish_date - @param nls_language - @param creation_date - @param creation_user - @param creation_ip - @param package_id Package_id content belongs to - @param is_live True is revision should be set live - - @param tmp_filename file containing content to be added to revision. Caller is responsible to handle cleaning up the tmp file - + @param tmp_filename file containing content to be added to revision. + The caller is responsible for cleaning up the temporary file. @param package_id - @param is_live - @param attributes A list of lists of pairs of additional attributes and their values to pass to the constructor. Each pair is a list of two elements: key => value such as @@ -133,9 +116,12 @@ -mime_type $mime_type \ -file $tmp_filename] - set query_text "insert into ${table_name}i - (revision_id, object_type, creation_user, creation_date, creation_ip, title, description, item_id, object_package_id, mime_type $attribute_names) - values (:revision_id, :content_type, :creation_user, :creation_date, :creation_ip, :title, :description, :item_id, :package_id, :mime_type $attribute_values)" + set query_text [subst { + insert into ${table_name}i + (revision_id, object_type, creation_user, creation_date, creation_ip, title, description, item_id, object_package_id, mime_type $attribute_names) + values (:revision_id, :content_type, :creation_user, :creation_date, :creation_ip, :title, :description, :item_id, :package_id, :mime_type $attribute_values) + }] + db_transaction { # An explicit lock was necessary for PostgreSQL between 8.0 and # 8.2; left the following statement here for documentary purposes @@ -148,6 +134,7 @@ # the postgres "insert into view" is rewritten by the rule into a "select" [expr {[db_driverkey ""] eq "postgresql" ? "db_0or1row" : "db_dml"}] \ insert_revision $query_text + ::content::revision::update_content \ -item_id $item_id \ -revision_id $revision_id \ @@ -162,16 +149,294 @@ return $revision_id } -ad_proc -public ::content::revision::update_content { - -item_id - -revision_id - -content - -storage_type - -mime_type - {-tmp_filename ""} +# +# ::content::revision::collect_cleanup_data +# +ad_proc -private ::content::revision::collect_cleanup_data { + -item_id:required + -storage_type:required } { + return [::content::revision::collect_cleanup_data-$storage_type -item_id $item_id] +} +ad_proc -private ::content::revision::collect_cleanup_data-text { + -item_id:required +} { + return +} + +ad_proc -private ::content::revision::collect_cleanup_data-lob { + -item_id:required +} { + return +} + +ad_proc -private ::content::revision::collect_cleanup_data-file { + -item_id:required +} { + return [db_list get_files {select content from cr_revisions where item_id = :item_id}] +} + +# +# ::content::revision::cleanup +# +ad_proc -private ::content::revision::cleanup { + -storage_type:required + -storage_area_key:required + -data:required +} { + return [::content::revision::cleanup-$storage_type \ + -storage_area_key $storage_area_key \ + -data $data] +} + +ad_proc -private ::content::revision::cleanup-text { + -storage_area_key:required + -data:required +} { + return +} + +ad_proc -private ::content::revision::cleanup-lob { + -storage_area_key:required + -data:required +} { + return +} + +ad_proc -private ::content::revision::cleanup-file { + -storage_area_key:required + -data:required +} { + + This function cleans-up files AFTER the DB-entry was deleted. If + the transaction is aborted, the file will not be executed and the + file will survive. Thus function should make + cr_check_orphaned_files obsolete, which does not scale. + + @see cr_check_orphaned_files +} { + set dir [cr_fs_path $storage_area_key] + foreach filename $data { + #ns_log notice "::content::revision::cleanup-file: DELETE FILE $dir$filename" + file delete $dir$filename + } +} + + +ad_proc -private ::content::revision::check_files { + {-max_results 5000} + {-max_checks 10000} + {-returnlist:boolean} +} { + Figure out, how many files in the CR are not linked to the + revisions in the content repository, and report them + optionally. + + @author Gustaf Neumann + + @param max_results stop after having found so many non-referenced files + @param max_checks stop after having checked so many non-referenced files + @param returnlist return the non-referenced files as part of the result +} { + set paths [cr_fs_path CR_FILES] + set prefix_length [string length $paths] + set count 1 + set missing 0 + set files {} + while {[llength $paths] > 0} { + # get the first path + set paths [lassign $paths path] + #ns_log notice "popping path '$path' form paths, remaining [llength $paths]" + + set children [glob -nocomplain -directory $path *] + foreach child $children { + if {[file tail $child] in {. ..}} { + continue + } + if {[file isdirectory $child]} { + # + # Using "lappend" leads to a breadth-search: might be + # slow when the directories a huge, since it takes a + # while until leaves are found. + # + #lappend paths $child + + set paths [lreplace $paths -1 -2 $child] + #ns_log notice "child is dir $child" + } else { + set suffix [string range $child $prefix_length end] + set success [cr_count_file_entries $suffix] + if {$success == 0} { + ns_log notice "check_files: $count file $child not in db entries" + incr missing + lappend files $child + } + incr count + if {$count >= $max_checks || $missing >= $max_results} break + + } + } + if {$count >= $max_checks || $missing >= $max_results} break + } + set msg "$missing of $count files are not ok (not contained in db entries)" + if {$returnlist_p} { + append msg \n [join $files \n] + } + return $msg +} + +ad_proc -private ::content::revision::check_dirs { + {-max_results 5000} + {-max_checks 10000} + {-returnlist:boolean} + {-prune:boolean} +} { + Figure out, how many directories in the CR are empty, report them + optionally or delete them optionally. + + @author Gustaf Neumann + + @param max_results stop after having found so many empty directories + @param max_checks stop after having checked so many directories + @param prune delete the found empty directories + @param returnlist return the directories as part of the result +} { + set paths [cr_fs_path CR_FILES] + set prefix_length [string length $paths] + set count 1 + set empty_dirs 0 + set dirs 0 + set empty_dir_list {} + while {[llength $paths] > 0} { + # get the first path + set paths [lassign $paths path] + #ns_log notice "popping path '$path' form paths, remaining [llength $paths]" + + set children [glob -nocomplain -directory $path *] + set nr_children 0 + incr dirs + foreach child $children { + if {[file tail $child] in {. ..}} { + continue + } + if {[file isdirectory $child]} { + # + # Using "lappend" leads to a breadth-search: might be + # slow when the directories a huge, since it takes a + # while until leaves are found. + # + #lappend paths $child + + set paths [lreplace $paths -1 -2 $child] + #ns_log notice "child is dir $child" + } + incr nr_children + } + if {$nr_children == 0} { + incr empty_dirs + ns_log notice "check_dirs: directory $path is empty ($empty_dirs out of $dirs)" + lappend empty_dir_list $path + if {$prune_p && [regexp {^\d+$} [file tail $path]]} { + file delete $path + } + } + if {$empty_dirs >= $max_results || $dirs >= $max_checks} { + break + } + } + set msg "$empty_dirs out of $dirs directories are empty" + ns_log notice "check_dirs: $msg" + if {$returnlist_p} { + append msg \n [join $empty_dir_list \n] + } + return $msg +} + +ad_proc -private ::content::revision::file_stats { + {-max 10000} +} { + + Determine some basic statistics about files in the CR based on a + sample. This is useful for large installations with several + million of files, where a detailed analysis would take very long. + + @author Gustaf Neumann + + @param max number of revisions with storage-type "file" to check + @result some statistics +} { + set tuples [db_list_of_lists get_file_names { + select i.item_id, revision_id, mime_type, content_length + from cr_items i, cr_revisions r + where storage_type = 'file' + and storage_area_key = 'CR_FILES' + and r.item_id = i.item_id + FETCH FIRST :max ROWS ONLY + }] + set count 0 + set total_length 0 + set empty_files 0 + foreach tuple $tuples { + lassign $tuple item_id revision_id mime_type content_length + incr count + if {$content_length eq ""} { + ns_log warning "file_stats: entry has no content_length: revision_id $revision_id mime_type $mime_type" + } else { + incr total_length $content_length + } + incr mime_types($mime_type) + incr revisions_for_item($item_id) + if {$content_length < 1} { + incr empty_files + } + } + set result "" + if {$count > 0} { + set backup_files 0 + set files_with_multiple_revisions 0 + foreach {item_id revs} [array get revisions_for_item] { + if {$revs > 1} { + incr files_with_multiple_revisions + incr backup_files [expr {$revs - 1}] + } + } + set most_common [lrange [lsort \ + -integer \ + -stride 2 \ + -index 1 \ + -decreasing \ + [array get mime_types] + ] 0 11] + + append result \ + "checked files : $count\n" \ + "files with multiple revisions: $files_with_multiple_revisions\n" \ + "backup files : $backup_files\n" \ + "empty files : $empty_files\n" \ + "avg file size : [format %10.2f [expr {$total_length*1.0/$count}]]\n" \ + "mime_types: $most_common" + ns_log notice "file_stats: $result" + } + return $result +} + + + + +# +# ::content::revision::update_content +# +ad_proc -private ::content::revision::update_content { + -item_id:required + -revision_id:required + -content:required + -storage_type:required + -mime_type:required + {-tmp_filename ""} +} { + Update content column separately. Oracle does not allow insert into a BLOB. @@ -187,43 +452,80 @@ @param content Content to add to resivsion @param storage_type text, file, or lob @param mime_type mime type of the content - @param tmp_filename For file storage type a filename can be specified. It will be added to the contnet repository. Caller is responsible to handle cleaning up the tmp file + @param tmp_filename For storage types except 'text' + a filename can be specified + instead of 'content'. The caller is responsible + for cleaning up the temporary file +} { + #ns_log notice "::content::revision::update_content" \ + "update_content-$storage_type $revision_id" \ + "content '$content' mime_type $mime_type tmp_filename '$tmp_filename'" - @return + ::content::revision::update_content-$storage_type \ + -item_id $item_id \ + -revision_id $revision_id \ + -content $content \ + -mime_type $mime_type \ + -tmp_filename $tmp_filename +} - @error +ad_proc -private ::content::revision::update_content-text { + -item_id:required + -revision_id:required + -content:required + -mime_type:required + {-tmp_filename ""} } { + db_dml update_content "" -blobs [list $content] - switch -- $storage_type { - file { - if {$tmp_filename eq ""} { - set filename [cr_create_content_file_from_string $item_id $revision_id $content] - } else { - set filename [cr_create_content_file $item_id $revision_id $tmp_filename] - } - set tmp_size [file size [cr_fs_path]$filename] - db_dml set_file_content "" - } - lob { - if {$tmp_filename ne ""} { - # handle file - set filename [cr_create_content_file $item_id $revision_id $tmp_filename] - db_dml set_lob_content "" -blob_files [list $tmp_filename] - db_dml set_lob_size "" - } else { - # handle blob - db_dml update_content "" -blobs [list $content] - } - } - default { - # HAM : 112505 - # I added a default switch because in some cases - # storage type is text and revision is not being updated - db_dml update_content "" -blobs [list $content] - } + if {$tmp_filename ne ""} { + # Traditionally, a provided tmp_file is not handled. I + # could/should be probably supported in the future. + ns_log warning "::content::revision::update_content-text: provided tmp_filename is ignored" } } +ad_proc -private ::content::revision::update_content-file { + -item_id:required + -revision_id:required + -content:required + -mime_type:required + {-tmp_filename ""} +} { + if {$tmp_filename eq ""} { + set filename [cr_create_content_file_from_string $item_id $revision_id $content] + } else { + set filename [cr_create_content_file $item_id $revision_id $tmp_filename] + } + set tmp_size [file size [cr_fs_path]$filename] + db_dml set_file_content { + update cr_revisions + set content = :filename, + mime_type = :mime_type, + content_length = :tmp_size + where revision_id = :revision_id + } +} + +ad_proc -private ::content::revision::update_content-lob { + -item_id:required + -revision_id:required + -content:required + -mime_type:required + {-tmp_filename ""} +} { + if {$tmp_filename ne ""} { + # handle file + set filename [cr_create_content_file $item_id $revision_id $tmp_filename] + db_dml set_content "" -blob_files [list $tmp_filename] + db_dml set_size "" + } else { + # handle blob + db_dml update_content "" -blobs [list $content] + } +} + + ad_proc -public content::revision::content_copy { -revision_id:required {-revision_id_dest ""} @@ -365,7 +667,11 @@ @return The item_id of the item to which this revision belongs } { - return [db_string item_id {} -default ""] + return [db_string item_id { + select item_id + from cr_revisions + where revision_id = :revision_id + } -default ""] } @@ -417,7 +723,23 @@ ] content_revision revision_name] } +ad_proc -public content::revision::get_title { + -revision_id:required +} { + Returns the title of a particular 'content_revision'. + + @param revision_id The 'revision_id' of the object + + @see content::item::get_title + @see content::revision::revision_name + + @return The title of the object (text), or empty if not found. + +} { + return [db_string get_title {select title from cr_revisions where revision_id = :revision_id} -default ""] +} + ad_proc -public content::revision::to_html { -revision_id:required } { @@ -456,9 +778,11 @@ } -ad_proc -public content::revision::update_attribute_index { +ad_proc -deprecated content::revision::update_attribute_index {} { + DEPRECATED: the db api for this proc was deleted in 2005 + + @see https://github.com/openacs/openacs-core/commit/1cf48b17dd5faa0a2cbd988ab28d3127d3e25c61#diff-f580056c1afc98a3c8bda629878b7ea8 } { -} { return [package_exec_plsql content_revision update_attribute_index] } @@ -477,11 +801,89 @@ # the file path is stored in filename column on oracle # and content in PostgreSQL, but we alias to filename so it makes # sense - db_1row get_storage_key_and_path "" + db_1row get_storage_key_and_path {} return [cr_fs_path $storage_area_key]${filename} } +# +# ::content::revision::export_to_filesystem +# +# This function was previously part of +# fs::publish_versioned_object_to_file_system but the application +# packages should be fully agnostic to the storage_type +# implementation. +ad_proc ::content::revision::export_to_filesystem { + -revision_id:required + -storage_type:required + -filename:required +} { + Export the content of the provided revision to the named file in + the filesystem. +} { + ::content::revision::export_to_filesystem-$storage_type \ + -revision_id $revision_id \ + -filename $filename +} + +ad_proc -private ::content::revision::export_to_filesystem-text { + -revision_id:required + -filename:required +} { + Export the content of the provided revision to the named file in + the filesystem. +} { + set content [db_string select_object_content { + select content from cr_revisions where revision_id = :revision_id + }] + set fp [open $filename w] + puts $fp $content + close $fp +} + +ad_proc -private ::content::revision::export_to_filesystem-file { + -revision_id:required + -filename:required +} { + Export the content of the provided revision to the named file in + the filesystem. +} { + set cr_file_name [content::revision::get_cr_file_path -revision_id $revision_id] + + # + # Check if cr_file_name is not empty, otherwise we could end up copying the + # whole content-repository. + # + if {$cr_file_name ne ""} { + # + # When there are multiple "unnamed files" in a directory, the + # constructed filename might exist already. This would lead to an + # error in the "file copy" operation. Therefore, generate a new + # name with an alternate suffix in these cases. + # + set base_name $filename + set count 0 + while {[ad_file exists $filename]} { + set filename $base_name-[incr $count] + } + + file copy -- $cr_file_name $filename + } else { + ad_log Warning "::content::revision::export_to_filesystem-file: \ + cr_file_name is empty (revision_id: $revision_id)" + } +} + +ad_proc -private ::content::revision::export_to_filesystem-lob { + -revision_id:required + -filename:required +} { + Export the content of the provided revision to the named file in + the filesystem. +} { + db_blob_get_file select_object_content {} -file $filename +} + # Local variables: # mode: tcl # tcl-indent-level: 4