Index: openacs-4/packages/acs-kernel/acs-kernel.info =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/acs-kernel/acs-kernel.info,v diff -u -r1.150.2.51 -r1.150.2.52 --- openacs-4/packages/acs-kernel/acs-kernel.info 7 Feb 2023 11:47:40 -0000 1.150.2.51 +++ openacs-4/packages/acs-kernel/acs-kernel.info 7 Feb 2023 17:50:32 -0000 1.150.2.52 @@ -9,15 +9,15 @@ f t - + OpenACS Core Team Routines and data models providing the foundation for OpenACS-based Web services. 2021-09-15 OpenACS The OpenACS kernel contains the core datamodel create and drop scripts for such things as objects, groups, parties and the supporting PL/SQL and PL/pgSQL procedures. 3 - + @@ -30,18 +30,19 @@ - + - + + - + Index: openacs-4/packages/acs-tcl/acs-tcl.info =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/acs-tcl/acs-tcl.info,v diff -u -r1.95.2.56 -r1.95.2.57 --- openacs-4/packages/acs-tcl/acs-tcl.info 7 Feb 2023 11:47:40 -0000 1.95.2.56 +++ openacs-4/packages/acs-tcl/acs-tcl.info 7 Feb 2023 17:50:31 -0000 1.95.2.57 @@ -9,7 +9,7 @@ f t - + OpenACS The Kernel Tcl API library. 2021-09-15 @@ -18,9 +18,9 @@ GPL version 2 3 - + - + Index: openacs-4/packages/acs-tcl/tcl/cluster-init.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/acs-tcl/tcl/cluster-init.tcl,v diff -u -r1.1.2.8 -r1.1.2.9 --- openacs-4/packages/acs-tcl/tcl/cluster-init.tcl 29 Dec 2022 13:02:48 -0000 1.1.2.8 +++ openacs-4/packages/acs-tcl/tcl/cluster-init.tcl 7 Feb 2023 17:50:32 -0000 1.1.2.9 @@ -1,32 +1,40 @@ # # Check if cluster is enabled, and if, set up the custer objects # +ns_log notice "server_cluster_enabled_p: [server_cluster_enabled_p]" if {[server_cluster_enabled_p]} { # - # Register the nodes, which are available at startup time. + # Check, whether the secret for intra-cluster communication is + # properly defined. If not, then do not activate cluster mode. # - ::acs::Cluster register_nodes + if {![::acs::cluster secret_configured]} { + ns_log error "cluster setup aborted:" \ + "the cluster secret is not properly defined." \ + "Deactivated cluster mode." + proc server_cluster_enabled_p {} { return 0 } + return + } + # - # Update the blueprint every 60s in case the cluster configuration - # has changed, or cluster nodes become available or unavailable. + # Perform setup only once (not in every object creation in new + # threads). # - ad_schedule_proc -all_servers t 20 ::acs::Cluster refresh_blueprint + ns_log notice "performing cluster setup" + ::acs::cluster setup - foreach ip [parameter::get -package_id $::acs::kernel_id -parameter ClusterAuthorizedIP] { - if {[string first * $ip] > -1} { - ::acs::Cluster eval [subst { - lappend :allowed_host_patterns $ip - }] - } else { - ::acs::Cluster eval [subst { - set :allowed_host($ip) 1 - }] - } - } + # + # Update the cluster info every 20s to detect changed cluster + # configurations, or cluster nodes become available or + # unavailable. + # + ad_schedule_proc -all_servers t 20s ::acs::cluster update_node_info - set url [::acs::Cluster eval {set :url}] + # + # Setup of the listening URL + # + set url [::acs::cluster cget -url] # Check, if the filter URL mirrors a site node. If so, # the cluster mechanism will not work, if the site node @@ -35,23 +43,33 @@ set node_info [site_node::get -url $url] if {[dict get $node_info url] ne "/"} { - ns_log notice "***\n*** WARNING: there appears a package mounted on" \ + ns_log warning "***\n*** WARNING: there appears a package mounted on" \ "$url\n***Cluster configuration will not work" \ "since there is a conflict with the filter with the same name! (n)" - } + } else { - #ns_register_filter trace GET $url ::acs::Cluster - ns_register_filter preauth GET $url ::acs::Cluster - #ns_register_filter postauth GET $url ::acs::Cluster - #ad_register_filter -priority 900 preauth GET $url ::acs::Cluster + #ns_register_filter trace GET $url ::acs::cluster + ns_register_filter preauth GET $url ::acs::cluster + #ns_register_filter postauth GET $url ::acs::cluster + #ad_register_filter -priority 900 preauth GET $url ::acs::cluster - ns_register_proc GET $url ::acs::Cluster incoming_request + ns_register_proc GET $url ::acs::cluster incoming_request + } + # + # Register the nodes, which are reachable at startup time. + # + ::acs::cluster register_nodes -startup + ns_atstartup { + # + # We could add some code for testing actively keep-alive + # status. + # ns_log notice "CHECK ::throttle '[::info commands ::throttle]'" if {0 && [::info commands ::throttle] ne ""} { - ns_log notice "CHECK calling ::acs::Cluster check_nodes" - throttle do ::acs::Cluster check_nodes + ns_log notice "CHECK calling ::acs::cluster check_nodes" + throttle do ::acs::cluster check_nodes } } } Index: openacs-4/packages/acs-tcl/tcl/cluster-procs.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/acs-tcl/tcl/cluster-procs.tcl,v diff -u -r1.1.2.4 -r1.1.2.5 --- openacs-4/packages/acs-tcl/tcl/cluster-procs.tcl 29 Dec 2022 14:17:33 -0000 1.1.2.4 +++ openacs-4/packages/acs-tcl/tcl/cluster-procs.tcl 7 Feb 2023 17:50:32 -0000 1.1.2.5 @@ -1,6 +1,5 @@ +# Copyright (C) 2022-2023 Gustaf Neumann, neumann@wu-wien.ac.at # -# Copyright (C) 2022 Gustaf Neumann, neumann@wu-wien.ac.at -# # Vienna University of Economics and Business # Institute of Information Systems and New Media # A-1020, Welthandelsplatz 1 @@ -40,11 +39,11 @@ # # Then, distribute the command to all servers in the cluster. # - ::acs::Cluster broadcast {*}$args + ::acs::cluster broadcast {*}$args return $result } - proc cache_flush_all {cache pattern} { + proc cache_flush_pattern {cache pattern} { # # Provide means to perform a wildcard-based cache flushing on # (cluster) machines. @@ -54,23 +53,44 @@ } } + proc cache_flush_all {} { + # + # Reset all caches and flush all of its contents. + # + foreach cache [ns_cache_names] { + ns_cache flush $cache + } + } + + #::nsf::method::property nx::Object "object method" debug on + #::nsf::method::property nx::Class method debug on + nx::Class create Cluster { :property {proto http} :property host :property {port 80} :property {url /acs-cluster-do} - :property {chan} - set :allowed_host_patterns [list] - set :url /acs-cluster-do - array set :allowed_host { "127.0.0.1" 1 } + # set cls [nx::Class create ::acs::ClusterMethodMixin { + # :method "object method" args { + # ns_log notice "[self] define object method $args" + # next + # } + # :method method args { + # ns_log notice "[self] define method $args" + # next + # } + # }] + # :object mixins add $cls + + :variable allowed_host { "127.0.0.1" 1 } # # The allowed commands are of the form # - command names followed by # - optional "except patterns" # - set :allowed_command { + :variable allowed_command { set "" unset "" nsv_set "" @@ -84,42 +104,70 @@ util_memoize_flush_regexp_local "" ns_urlspace "" acs::cache_flush_all "" + acs::cache_flush_pattern "" + ::acs::cluster "^::acs::cluster\s+join_request" } - :object method log {args} { + # + # Control verbosity + # + :method log {args} { ns_log notice "cluster: [join $args { }]" } - :method log {args} { - ns_log notice "cluster host ${:host} ${:port}: [join $args { }]" + + :public method setup {} { + # + # Setup object specific variables. Make sure to call this + # method, when the called procs are available. + # + set :currentServerLocations [:current_server_locations] + set :currentServerLocation [:preferred_location ${:currentServerLocations}] + + set :canonicalServer [parameter::get -package_id $::acs::kernel_id -parameter CanonicalServer] + set :canonicalServerLocation [:preferred_location [:qualified_location ${:canonicalServer}]] + + set :current_server_is_canonical_server [:current_server_is_canonical_server] + set :staticServerLocations \ + [lmap entry [parameter::get -package_id $::acs::kernel_id -parameter ClusterPeerIP] { + :preferred_location [:qualified_location $entry] + }] } + :method init {} { + nsv_set cluster . . + next + } + # - # Handling the ns_filter methods + # Handling the ns_filter methods (as defined in cluster-init.tcl) # - :public object method preauth args { + :public method preauth args { + # + # Process no more pre-authorization filters for this + # connection (avoid running of expensive filters). + # #ns_log notice "PREAUTH returns filter_break" return filter_break } - :public object method postauth args { - #ns_log notice "POSTAUTH returns filter_break" - return filter_break - } + # :public method postauth args { + # #ns_log notice "POSTAUTH returns filter_break" + # return filter_break + # } - :public object method trace args { - #:log "trace" - #ns_log notice "TRACE handles request" - #:incoming_request - #ns_log notice "TRACE returns filter_return" - return filter_return - } + # :public method trace args { + # #:log "trace" + # #ns_log notice "TRACE handles request" + # #:incoming_request + # #ns_log notice "TRACE returns filter_return" + # return filter_return + # } - :public object method allowed_command {cmd} { + :method allowed_command {cmd} { # # Check, which command are allowed to be executed in the # cluster. # - #ns_log notice "--cluster allowed [dict keys ${:allowed_command}]?" set cmd_name [lindex $cmd 0] #ns_log notice "--cluster can i execute $cmd_name? [dict exists ${:allowed_command} $cmd_name]" @@ -128,8 +176,8 @@ #ns_log notice "--cluster [list regexp $except_RE $cmd] -> [regexp $except_RE $cmd]" set allowed [expr {$except_RE eq "" || ![regexp $except_RE $cmd]}] } elseif {[nsf::is object $cmd_name] - && ($cmd_name ::nsf::methods::object::info::hastype acs::Cache - || $cmd_name ::nsf::methods::object::info::hastype acs::LockfreeCache)} { + && ([$cmd_name ::nsf::methods::object::info::hastype acs::Cache] + || [$cmd_name ::nsf::methods::object::info::hastype acs::LockfreeCache])} { # # Allow operations on cache objects (e.g. needed for) # @@ -142,24 +190,35 @@ } # - # handle incoming request issues + # Handle incoming requests # - :public object method incoming_request {} { + :public method incoming_request {} { + # + # We received an incoming request from a cluster peer. + # catch {::throttle do incr ::count(cluster:received)} - set cmd [ns_queryget cmd] - set addr [lindex [ns_set iget [ns_conn headers] x-forwarded-for] end] - set sender [ns_set iget [ns_conn headers] host] - nsv_set cluster $sender-update [clock clicks -milliseconds] - nsv_incr cluster $sender-count - if {$addr eq ""} {set addr [ns_conn peeraddr]} - ns_log notice "--cluster got cmd='$cmd' from $addr // sender $sender" + ad_try { #ns_logctl severity Debug(connchan) on #ns_logctl severity Debug(request) on #ns_logctl severity Debug(ns:driver) on #ns_logctl severity Debug on + set r [:message decode] + set receive_timestamp [clock clicks -milliseconds] + dict with r { + # + # We could check here the provided timepstamp and + # honor only recent requests (protection against + # replay attacks). However, the allowed requests + # are non-destructive. + # + nsv_set cluster $peer-last-contact $receive_timestamp + nsv_set cluster $peer-last-request $receive_timestamp + nsv_incr cluster $peer-count + ns_log notice "--cluster got cmd='$cmd' from $peer after [expr {$receive_timestamp - $timestamp}]ms" - set result [::acs::Cluster execute [ns_conn peeraddr] $cmd] + set result [:execute $r] + } } on error {errorMsg} { ns_log notice "--cluster error: $errorMsg" ns_return 417 text/plain $errorMsg @@ -170,31 +229,31 @@ } # - # Handling incoming requests from host + # Handling incoming requests from peeraddr # - :public object method execute {host cmd} { - if {![info exists :allowed_host($host)]} { - set ok 0 - foreach g ${:allowed_host_patterns} { - if {[string match $g $host]} { - set ok 1 - break - } + :method execute {messageDict} { + #:log execute $messageDict + dict with messageDict { + if {$peer ni [nsv_get cluster cluster_peer_nodes]} { + ns_log notice ":execute: {$peer ni [nsv_get cluster cluster_peer_nodes]} // cmd $cmd" + set ok [dict exists ${:allowed_host} $peeraddr] + } else { + set ok 1 } if {!$ok} { - error "refuse to execute commands from $host (command: '$cmd')" + ns_log notice "could refuse to execute commands from $peeraddr (command: '$cmd') allowed [dict keys ${:allowed_host}]" } + if {[:allowed_command $cmd]} { + ns_log notice "--cluster executes command '$cmd' from peeraddr $peeraddr port [ns_conn peerport]" + return [{*}$cmd] + } + error "command '$cmd' from peeraddr $peeraddr not allowed" } - if {[::acs::Cluster allowed_command $cmd]} { - ns_log notice "--cluster executes command '$cmd' from host $host port [ns_conn peerport]" - return [eval $cmd] - } - error "command '$cmd' from host $host not allowed" } - :public object method broadcast args { + :public method broadcast args { # - # Send requests to all cluster nodes. + # Send requests to all cluster peers. # if {[ns_ictl epoch] > 0} { catch {::throttle do incr ::count(cluster:broadcast)} @@ -205,7 +264,11 @@ # caching in place. # if {[ns_config "ns/parameters" cachingmode "per-node"] eq "none" - && [lindex $args 0] in {acs::cache_flush_all ns_cache}} { + && [lindex $args 0] in { + acs::cache_flush_pattern + acs::cache_flush_all + ns_cache} + } { # # If caching mode is none, it is expected that all # nodes have this parameter set. Therefore, there is no @@ -214,277 +277,608 @@ return } - if {[ns_ictl epoch] > 0} { - foreach server [:info instances] { - catch {::throttle do incr ::count(cluster:sent)} - set t0 [clock clicks -microseconds] - $server message {*}$args - set ms [expr {([clock clicks -microseconds] - $t0)/1000}] - catch {::throttle do incr ::agg_time(cluster:sent) $ms} + if {[nsv_get cluster cluster_peer_nodes locations]} { + # + # During startup the throttle thread might not be started, + # so omit these statistic values + # + if {[ns_ictl epoch] > 0} { + foreach location $locations { + catch {::throttle do incr ::count(cluster:sent)} + set t0 [clock clicks -microseconds] + :send $location {*}$args + set ms [expr {([clock clicks -microseconds] - $t0)/1000}] + catch {::throttle do incr ::agg_time(cluster:sent) $ms} + } + } else { + foreach location $locations { + :send $location {*}$args + } } - } else { - foreach server [:info instances] { - $server message {*}$args - } } } - :public object method refresh_blueprint {} { + :public method update_node_info {} { # - # Update the blueprint in case the nodes have - # changed. This might happen, when the configuration - # variables changed, or when nodes become + # Update cluster configuration when the when the + # configuration variables changed, or when nodes become # available/unvavailable after some time. # - set oldConfig [::acs::Cluster info instances] - :register_nodes - set newConfig [::acs::Cluster info instances] - if {$newConfig ne $oldConfig} { - set code "" - foreach obj $newConfig { - append code [$obj serialize] \n + # Typically, this method is called via scheduled procedure + # every couple of seconds when clustering is enabled. + # + + set dynamic_peers [parameter::get \ + -package_id $::acs::kernel_id \ + -parameter DynamicClusterPeers] + + if {!${:current_server_is_canonical_server}} { + # + # The current node might be a static or a dynamic + # peer. Do we have contact to the canonical_server? + # + if {![:reachable ${:canonicalServerLocation}]} { + # + # We lost contact to the canonical server. This is + # for our server not a big problem, since all + # other peer-to-peer updates will continue to + # work. + # + # During downtime of the canonical server, + # scheduled procedures (e.g. mail delivery) will + # be interrupted, and no new servers can register. + # + ns_log warning "cluster node lost contact to " \ + "canonical server: ${:canonicalServerLocation}" } - ns_log notice "cluster: node configuration changed:\n$code" - ns_eval $code + # + # Are we an dynamic peer and not listed in + # DynamicClusterPeers? This might happen in + # situations, where the canonical server was + # restarted (or separated for a while). + # + if {[:current_server_is_dynamic_cluster_peer] + && ${:currentServerLocation} ni $dynamic_peers + } { + ns_log warning "cluster node is not listed in dynamic peers." \ + "Must re-join canonical server: ${:canonicalServerLocation}" + :send_join_request ${:canonicalServerLocation} + } } + + # + # Update cluster_peer_nodes if necessary + # + set oldConfig [lsort [nsv_get cluster cluster_peer_nodes]] + set newConfig [lsort [:peer_nodes $dynamic_peers]] + if {$newConfig ne $oldConfig} { + # + # The cluster configuration has changed + # + ns_log notice "cluster config changed:\nOLD $oldConfig\nNEW $newConfig" + nsv_set cluster cluster_peer_nodes $newConfig + } } - :public object method check_nodes {} { + :public method last_contact {location} { # - # For the time being (testing only) just measure some - # times from the canonical server with hardcoded locations + # Return the number of seconds since the last contact with + # the denoted server. If there is no data available, + # the return values is empty. # - if {[ad_canonical_server_p]} { - ns_log notice "-------check nodes" - ::acs::CS_127.0.0.1_8101 message set x ns_http - ::acs::CS_127.0.0.1_8444 message set x ns_https - ::acs::CS_127.0.0.1_8101 message -delivery connchan set x ns_connchan - ::acs::CS_127.0.0.1_8444 message -delivery connchan set x https-connchan - ::acs::CS_127.0.0.1_8101 message -delivery udp set x udp + if {[nsv_get cluster $location-last-contact clicksms]} { + return [expr {([clock clicks -milliseconds] - $clicksms)/1000.0}] } - # foreach node [::acs::Cluster info instances] { - # if {[$node require_connchan_channel]} { - # if {$node eq "::acs::CS_127.0.0.1_8101"} { - # #ns_log notice "[self] check_node $node is connected [$node cget -chan]" - # #ns_logctl severity Debug(connchan) on - # #ns_logctl severity Debug(request) on - # #ns_logctl severity Debug(ns:driver) on - # #ns_logctl severity Debug on - # $node connchan_message set ok 123 - # } - # } else { - # # - # # We see a warning message in the log file, when - # # the server cannot connect to the node. - # # - # #ns_log notice "[self] check_node $node is not connected" - # } - # } - set :to [::after 1000 [list [self] check_nodes]] + } + :public method last_request {location} { + # + # Return the number of seconds since the last request from + # the denoted server. If there is no data available, + # the return values is empty. + # + ns_log notice "check last-request <$location-last-request>" + if {[nsv_get cluster $location-last-request clicksms]} { + return [expr {([clock clicks -milliseconds] - $clicksms)/1000.0}] + } + } + :method reachable {location} { + #:log "reachable $location" + set d [ns_parseurl $location] + #ns_log notice "reachable: $location -> $d" + set result 0 + dict with d { + switch $proto { + "udp" { + # + # assume, udp is always reachable + # + set result 1 + } + "http" - + "https" { + # + # We can check via ns_connchan + # + try { + #ns_logctl severity Debug(connchan) on + ns_connchan connect $host $port + } on error {} { + # + # Not reachable, stick with the default 0 + # + } on ok {chan} { + set result 1 + ns_connchan close $chan + } + } + } + } + :log "node $location is reachable: $result" \ + "last_contact [:last_contact $location]" \ + "last_request [:last_request $location]" + if {$result} { + nsv_set cluster $location-last-contact [clock clicks -milliseconds] + } + return $result } - :public object method register_nodes {} { + :method is_current_server {location} { # - # Register the defined cluster nodes + # Check, if the provided location is the current server. + # We expect the that the method "setup" was already called. # + set result [expr {$location in ${:currentServerLocations}}] + #ns_log notice "is_current_server called with proto -> $location -> $result" + return $result + } + :method is_configured_server {locations} { # - # First delete the old cluster node objects + # Check, if one of the provided locations is in the + # currently configured cluster nodes. # - foreach node [::acs::Cluster info instances] { - $node destroy + foreach location $locations { + if {$location in ${:configured_cluster_hosts}} { + return 1 + } } + return 0 + } + :method is_canonical_server {location} { # - # Base configuration values + # Check, if provided location belongs to the the canonical + # server specs. The canonical server might listen on + # multiple protocols, IP addresses and ports. # - set cluster_do_url [::acs::Cluster eval {set :url}] - set myConfig [server_cluster_my_config] + if { ${:canonicalServer} eq "" } { + ns_log Error "Your configuration is not correct for server clustering." \ + "Please ensure that you have the CanonicalServer parameter set correctly." + return 1 + } + set result [expr {$location in ${:canonicalServerLocation}}] + #ns_log notice "is_canonical_server $location -> $result" + return $result + } + + :public method current_server_is_canonical_server {} { # - # Create new cluster node objects. Iterate over all - # servers in the cluster and add Cluster objects for the - # ones, which are different from the current host (the - # peer hosts). + # Check, if the current server is the canonical_server. # - foreach location [server_cluster_all_hosts] { - ns_log notice "creating ::acs::Cluster on $location" - try { - server_cluster_get_config $location - } on ok {config} { - } on error {errorMsg} { - ns_log notice "ignore $hostport (server_cluster_get_config returned $errorMsg)" - continue + if { ${:canonicalServer} eq "" } { + ns_log Error "Your configuration is not correct for server clustering." \ + "Please ensure that you have the CanonicalServer parameter set correctly." + return 1 + } + set result 0 + foreach location ${:currentServerLocations} { + if {[:is_canonical_server $location]} { + set result 1 + break } - dict with config { - if {$host in [dict get $myConfig host] - && $port in [dict get $myConfig port] - } { - ns_log debug "Cluster: server $host $port is no cluster peer" - continue - } - # try { - # ns_logctl severity Debug(connchan) on - # ns_connchan connect $host $port - # } on error {} { - # ns_log notice "Cluster: server $host $port is not available" - # continue - # } on ok {chan} { - # ns_connchan close $chan - # } + } + #:log "current_server_is_canonical_server $result" + return $result + } - # ns_log debug "Cluster: server $host $port is an available cluster peer" - ns_log notice "call create ::acs::Cluster create CS_${host}_${port}" + :method current_server_is_dynamic_cluster_peer {} { + # + # We are a dynamic cluster peer, when we are not the + # canonical server neither isted in the static server + # locations. + # + if {${:current_server_is_canonical_server}} { + return 0 + } + return [expr {${:currentServerLocation} ni ${:staticServerLocations}}] + } - ::acs::Cluster create CS_${host}_${port} \ - -proto $proto \ - -host $host \ - -port $port \ - -url $cluster_do_url + :method qualified_location {location} { + # + # Return a canonical representation of the provided + # location, where the DNS name is resolved and the + # protocol and port is always included. When there is no + # protocol provided, HTTP is assumed. There is no default + # provided for non-HTTP* locations. + # + # In theory, an input location might map to multiple + # values, when e.g., a provided DNS name refers to + # multiple IP addresses. For now, we just return always a + # single value. + # + set d {port 80 proto http} + if {[regexp {^([^:]+)://} $location . proto]} { + if {$proto eq "https"} { + set d {port 443 proto https} } + set d [dict merge $d [ns_parseurl $location]] + dict unset d tail + dict unset d path + } else { + set d [dict merge $d [ns_parsehostport $location]] } + # + # To return all IP addresses, we could use "ns_addrbyhost + # -all ..." instead. + # + dict set d host [ns_addrbyhost [dict get $d host]] + dict with d { + set result [util::join_location -proto $proto -hostname $host -port $port] + } + return $result } - :method name {} { - return ${:proto}://${:host}:${:port} + + :method preferred_location {locations:1..n} { + # + # Return the preferred location. + # + set preferred_location_regexp [parameter::get \ + -package_id $::acs::kernel_id \ + -parameter PreferredLocationRegexp \ + -default https:// ] + + set preferred_location "" + foreach location $locations { + if {[regexp $preferred_location_regexp $location]} { + set preferred_location $location + break + } + } + if {$preferred_location eq ""} { + set preferred_location [lindex $locations 0] + } + return $preferred_location } - :public method require_connchan_channel {} { + :method current_server_locations { + {-network_drivers {nssock nsssl nsudp}} + } { # + # Return a list of valid locations of the current server. # + # Since "ns_driver info" is not yet available at the time, + # the *-init files are loaded, this method goes a long way + # to check for properties of all of the loaded modules. + # Network drivers with empty "port" or port == 0 are + # ignored. # - if {![info exists :chan]} { - set tlsOption [expr {${:proto} in {https} ? "-tls" : ""}] - try { - set :retry 0 - ns_connchan connect -timeout 10ms {*}$tlsOption ${:host} ${:port} - } on ok {result} { - set :chan $result - ns_log notice "-cluster: [:name] connected - channel ${:chan}" - } on error {errorMsg} { - ns_log warning "-cluster: [:name] can not connect" + set result {} + set protos {nssock http nsssl https nsudp udp nscoap coap} + set module_file_regexp [join [dict keys $protos] |] + + foreach module_section [list ns/server/[ns_info server]/modules ns/modules] { + set modules [ns_configsection $module_section] + if {$modules ne ""} { + foreach {module file} [ns_set array $modules] { + # + # To obtain idependence of the driver name, we + # check whether the name of the binary (*.so + # or *.dylib) is one of the supported driver + # modules. + # + if {![regexp ($module_file_regexp) $file . module_type]} { + continue + } + + #ns_log notice "current_server_locations: use module <$module> $file" + set driver_section [ns_driversection -driver $module] + foreach ip [ns_config $driver_section address] { + foreach port [ns_config -int $driver_section port] { + if {$port == 0} { + continue + } + lappend result [util::join_location \ + -proto [dict get $protos $module_type] \ + -hostname $ip \ + -port $port] + } + } + } } } - return [info exists :chan] + set result [lsort -unique $result] + ns_log notice "current_server_locations returns $result" + return $result } - :public method has_channel {} { - return [info exists :chan] - } - :method connchan_retry_message {args} { + :public method send_join_request {location} { # - # Make a single retry to send an HTTP message to this node - # and return its full HTTP response on success. + # Send a join request to the canonical server. # + :log "send_join_request to $location" + set r [:send $location [self] join_request ${:currentServerLocation}] + #:log "... join_request returned $r" + if {[dict exists $r body]} { + # + # During startup/separation caches might not be in + # sync. Therefore, we have lost confidence in our + # caches and clear these. + # + :log "send_join_request returned $body, flushing all my caches" + acs::cache_flush_all + } + } + + :public method join_request {peerLocation} -returns boolean { # - # Cleanup old connection + # A join request was received # - try { - ns_connchan close ${:chan} - } on error {errorMsg} { - ns_log notice "... connchan ${:chan} CLOSE returns error $errorMsg, giving up" - return + ns_log notice "Cluster join_request from '$peerLocation'" + set success 1 + # + # Was the join request received by a canonical server? + # + if {![:current_server_is_canonical_server]} { + ns_log warning "Cluster join_request rejected," \ + "since it was received by a non-canonical server" + set success 0 + } else { + # + # We know, we are running on the canonical server, an we + # know that the request is trustworthy. + # + ns_log notice "Cluster join_request $peerLocation accepted from $peerLocation" + set dynamicClusterNodes [parameter::get -package_id $::acs::kernel_id -parameter DynamicClusterPeers] + set dynamicClusterNodes [lsort -unique [concat $dynamicClusterNodes $peerLocation]] + # + # The parameter::set_value operation is broadcasted to all cluster nodes. + # + parameter::set_value -package_id $::acs::kernel_id -parameter DynamicClusterPeers -value $dynamicClusterNodes + ns_log notice "Cluster join_request leads to DynamicClusterPeers $dynamicClusterNodes" } - unset -nocomplain :chan + return $success + } + + + :method peer_nodes {dynamic_peers} { # - # Create at new connection, but notice retry mode to avoid - # endless retries for one message + # Determine the peer nodes of the server cluster. These + # are cluster nodes which will receive intra-server + # commands. # - #ns_log notice "... connchan ${:chan} CLOSED" - if {[:require_connchan_channel]} { - set :retry 1 - ns_log notice "-cluster: [self] connchan RETRY channel ${:chan}" - :connchan_message {*}$args + set :configured_cluster_hosts {} + set peer_nodes {} + foreach location [server_cluster_all_hosts] { + # + # Since the input can depend on erroneous user input, + # use "try" to ease debugging. + # + try { + :qualified_location $location + } on ok {qualified_location} { + lappend :configured_cluster_hosts $qualified_location + } on error {errorMsg} { + ns_log notice "ignore $location (:qualified_location returned $errorMsg)" + continue + } + if {[:is_current_server $qualified_location]} { + #array:log "$qualified_location is the current server" + continue + } + # + # For dynamic cluster peers, check the reachability + # + if {$qualified_location in $dynamic_peers + && ![:reachable $qualified_location] + } { + ns_log warning "cluster node lost contact to dynamic cluster peer: $qualified_location" + continue + } + + lappend peer_nodes $qualified_location } + return $peer_nodes } - :method connchan_message {args} { + :public method register_nodes {{-startup:switch false}} { # - # Send an HTTP message to this node and return its full HTTP - # response on success. + # Register the defined cluster nodes by + # creating/recreating cluster node objects. # - set reply "" - #set t0 [clock clicks -microseconds] - if {[:require_connchan_channel]} { - set message "GET /${:url}?cmd=[ns_urlencode $args] HTTP/1.1\r\nHost:localhost\r\n\r\n" - #ns_log notice "-cluster: send $message to ${:proto}://${:host}:${:port}" + :log ":register_nodes startup $startup" - try { - ns_connchan write ${:chan} $message - #set t2 [clock clicks -microseconds] - #ns_log notice "... message sent" - set reply [ns_connchan read ${:chan}] - #set t3 [clock clicks -microseconds] + # + # Configure base configuration values + # + # + set dynamic_peers [parameter::get -package_id $::acs::kernel_id -parameter DynamicClusterPeers] - #ns_log notice "... reply $reply" - } on error {errorMsg} { - #ns_log notice "-cluster: send $args to ${:proto}://${:host}:${:port} returned ERROR $::errorInfo $errorMsg" - ns_log notice "-cluster: send connchan ${:chan} error $errorMsg RETRY ${:retry}" - if {${:retry} == 0} { - set reply [:connchan_retry_message {*}$args] + # At startup, when we are running on the canonical server, + # check, whether the existing DynamicClusterPeers are + # still reachable. When the canonical server is started + # before the other cluster nodes, this parameter should be + # empty. However, when the canonical server is restarted, + # there might be some of the peer nodes already active. + # + if {$startup + && ${:current_server_is_canonical_server} + && $dynamic_peers ne "" + } { + # + # When we are starting the canonical server, it resets + # the potentially pre-existing dynamic nodes unless + # these are reachable. + # + set old_peer_locations $dynamic_peers + :log "canonical server starts with existing DynamicClusterPeers nodes: $old_peer_locations" + # + # Keep the reachable cluster nodes in + # "DynamicClusterPeers". + # + set new_peer_locations {} + foreach location $old_peer_locations { + if {[:reachable $location]} { + lappend new_peer_locations $location } - } on ok {result} { - set :retry 0 - #ns_log notice "-cluster: [:name] sent OK " \ - "total [expr {([clock clicks -microseconds] - $t0)/1000.0}]ms" \ - "write [expr {($t2 - $t0)/1000.0}]ms" \ - "read [expr {($t3 - $t2)/1000.0}]ms" \ } + if {$new_peer_locations ne $old_peer_locations} { + # + # Update the DynamicClusterPeers in the database + # such that the other nodes will pick it up as + # well. + # + :log "updating DynamicClusterPeers to $new_peer_locations" + parameter::set_value -package_id $::acs::kernel_id -parameter DynamicClusterPeers \ + -value $new_peer_locations + set dynamic_peers $new_peer_locations + } } - return $reply - } - :method ns_http_message args { - #:log "--cluster outgoing request to ${:proto}://${:host}:${:port} // $args" - try { - ns_http run ${:proto}://${:host}:${:port}/${:url}?cmd=[ns_urlencode $args] - } on error {errorMsg} { - ns_log warning "-cluster: send message to ${:proto}://${:host}:${:port}/${:url}?cmd=[ns_urlencode $args] failed: $errorMsg" - set result "" - } on ok {result} { - #ns_log notice "-cluster: response $result" + # + # Determine the peer nodes. + # + set cluster_peer_nodes [:peer_nodes $dynamic_peers] + nsv_set cluster cluster_peer_nodes $cluster_peer_nodes + + if {![:is_configured_server ${:currentServerLocations}]} { + # + # Current node is not pre-registered. + # + ns_log notice "Current host ${:currentServerLocation} is not included in ${:configured_cluster_hosts}" + if {![:current_server_is_canonical_server]} { + ns_log notice "... must join at canonical server ${:canonicalServerLocation}" + :send_join_request ${:canonicalServerLocation} + } + } else { + #ns_log notice "Current host ${:currentServerLocation} is included in ${:configured_cluster_hosts}" } - return $result } - :method udp_message args { - #:log "--cluster outgoing request to ${:proto}://${:host}:${:port} // $args" - try { - ns_udp ${:host} ${:port} "GET /${:url}?cmd=[ns_urlencode $args] HTTP/1.0\n\n" - } on error {errorMsg} { - ns_log warning "-cluster: send message to ${:proto}://${:host}:${:port}/${:url}?cmd=[ns_urlencode $args] failed: $errorMsg" - set result "" - } on ok {result} { - #ns_log notice "-cluster: response $result" + :public method secret_configured {} { + # + # Check, whether the secret for signing messages in the + # intra-cluster talk is configured. + # + # More checks for different secret definition methods + # might be added. + # + set secret [:secret] + return [expr {$secret ne ""}] + } + + :method secret {} { + # + # Return secret used for signing messages + # + return [ns_config ns/server/[ns_info server]/acs ClusterSecret] + } + # + # Methods for message encoding/decoding + # + :method "message sign" {message} { + # + # Return signature for message + # + #:log "message sign: $message" + return [ns_crypto::hmac string -digest sha256 [:secret] $message] + } + + :method "message verify" {message signature} { + # + # Verify if the signature of the message is ok and return + # boolean value. + # + #:log "message verify {$message $signature}" + set local_signature [ns_crypto::hmac string -digest sha256 [:secret] $message] + return [expr {$local_signature eq $signature}] + } + + :method "message encode" {cmd} { + set timestamp [clock clicks -milliseconds] + append result \ + cmd=[ns_urlencode $cmd] \ + &f=[ns_urlencode ${:currentServerLocation}] \ + &t=$timestamp \ + &s=[:message sign [list $cmd $timestamp]] + } + + :method "message decode" {} { + # + # Return a dict of the decoded message + # TODO: add timestamp? + # + dict set r cmd [ns_queryget cmd] + dict set r peer [ns_queryget f] + dict set r timestamp [ns_queryget t] + dict set r signature [ns_queryget s] + dict set r peeraddr [ns_conn peeraddr] + dict with r { + if {![:message verify [list $cmd $timestamp] $signature]} { + error "received message from $peeraddr does not match signature: $r" + } } - return $result + return $r } - :public method message {{-delivery ns_http} args} { + # + # Methods for message delivery + # + :public method send {{-delivery ns_http} location args} { # - # Send a command by different means to the node server for - # intra-server talk. + # Send a command by different means to the cluster node + # for intra-server talk. # # Valid delivery methods are # - ns_http (for HTTP and HTTPS) # - connchan (for HTTP and HTTPS) # - udp (plain UDP only) # - #:log "--cluster outgoing request to [:name] // $args" + :log "outgoing request to $location // $args" set t0 [clock clicks -microseconds] switch $delivery { - ns_http - - connchan - - udp {set result [:${delivery}_message {*}$args]} + #connchan - + #udp - + ns_http {set result [:${delivery}_send $location {*}$args]} default {error "unknown delivery method '$delivery'"} } - ns_log notice "-cluster: [:name] $args sent" \ + ns_log notice "-cluster: $location $args sent" \ "total [expr {([clock clicks -microseconds] - $t0)/1000.0}]ms" return $result } + + :method ns_http_send {location args} { + #:log "outgoing ns_http request to $location // $args" + try { + ns_http run $location/${:url}?[:message encode $args] + } on error {errorMsg} { + ns_log warning "-cluster: send message to $location/${:url}?cmd=[ns_urlencode $args] failed: $errorMsg" + set result "" + } on ok {result} { + #ns_log notice "-cluster: response $result" + } + return $result + } + } + # + # Define the acs::cluster object, since this is used e.g. in + # "acs::clusterwide", which is used quite early during boot. + # + acs::Cluster create ::acs::cluster } Index: openacs-4/packages/acs-tcl/tcl/server-cluster-procs.tcl =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/acs-tcl/tcl/server-cluster-procs.tcl,v diff -u -r1.10.2.6 -r1.10.2.7 --- openacs-4/packages/acs-tcl/tcl/server-cluster-procs.tcl 14 Jun 2022 17:59:36 -0000 1.10.2.6 +++ openacs-4/packages/acs-tcl/tcl/server-cluster-procs.tcl 7 Feb 2023 17:50:32 -0000 1.10.2.7 @@ -6,7 +6,11 @@ @creation-date 7 Mar 2000 } -ad_proc server_cluster_enabled_p {} { Returns true if clustering is enabled. } { +ad_proc server_cluster_enabled_p {} { + + Returns true if clustering is enabled. + +} { return [parameter::get \ -package_id $::acs::kernel_id \ -parameter ClusterEnabledP \ @@ -15,114 +19,40 @@ ad_proc server_cluster_all_hosts {} { - Returns a list of all hosts, possibly including this host, in the - server cluster. + Returns a list of all hosts in the server cluster, possibly + including the current host. } { if { ![server_cluster_enabled_p] } { return {} } - return [parameter::get -package_id $::acs::kernel_id -parameter ClusterPeerIP] -} + # + # For now, include the CanonicalServer as well in the all_hosts + # list, since the eases the configuration. Later, we might want to + # have a canonical server, which is not a worker node, so it would + # not need to receive all the cache-flush operations. + # + set nodes [lsort -unique [concat \ + [parameter::get -package_id $::acs::kernel_id -parameter CanonicalServer] \ + [parameter::get -package_id $::acs::kernel_id -parameter ClusterPeerIP] \ + [parameter::get -package_id $::acs::kernel_id -parameter DynamicClusterPeers] ]] -ad_proc server_cluster_peer_hosts {} { - - Returns a list of all hosts, excluding this host, in the server cluster. - -} { - return [lmap cluster_server [::acs::Cluster info instances] { - util::join_location \ - -hostname [$cluster_server cget -host] \ - -port [$cluster_server cget -port] - }] + #ns_log notice "server_cluster_all_hosts returns <$nodes>" + return $nodes } -ad_proc server_cluster_authorized_p { ip } { - - Can a request coming from $ip be a valid cluster request, i.e., - matches some value in ClusterAuthorizedIP or is 127.0.0.1? - -} { - if { ![server_cluster_enabled_p] } { - return 0 - } - - if { $ip == "127.0.0.1" } { - return 1 - } - - foreach glob [parameter::get -package_id $::acs::kernel_id -parameter ClusterAuthorizedIP] { - if { [string match $glob $ip] } { - return 1 - } - } - return 0 -} - -ad_proc -private server_cluster_my_config {} { -} { - set driver_section [ns_driversection -driver nssock] - set my_ips [ns_config $driver_section address] - set my_ports [ns_config -int $driver_section port] - return [list host $my_ips port $my_ports] -} - -ad_proc -private server_cluster_get_config {location} { - Return a dict parsed from the host and port spec. - If no port is specified, it defaults to 80. - If no scheme is specified, it defaults to "http". - In case the hostname is provided as an DNS name, it is resolved. - - @param location location (e.g., https://localhost:8443) or just host with optional port - @return dict containing proto, host, and port -} { - set d {port 80 proto http} - if {[regexp {^([^:]+)://} $location . proto]} { - if {$proto eq "https"} { - set d {port 443 proto https} - } - set d [dict merge $d [ns_parseurl $location]] - dict unset d tail - dict unset d path - } else { - set d [dict merge $d [ns_parsehostport $location]] - } - dict set d host [ns_addrbyhost [dict get $d host]] - return $d -} - - ad_proc -private ad_canonical_server_p {} { Returns true if this is the primary (called historically "canonical") server, false otherwise. - Since the server can listen to multiple IP addresses and on - multiple ports, all of these have to be checked. + This function is e.g. used to determine, whether scheduled + procedures are run on the current node. + + @return boolean value } { - set canonical_server [parameter::get -package_id $::acs::kernel_id -parameter CanonicalServer] - if { $canonical_server eq "" } { - ns_log Error "Your configuration is not correct for server clustering." \ - "Please ensure that you have the CanonicalServer parameter set correctly." - return 1 - } - set myConfig [server_cluster_my_config] - set canonicalConfig [server_cluster_get_config $canonical_server] - # - # Both, myConfig and canonicalConfig can contain multiple IP - # addressen and ports. - # - foreach my_ip [dict get $myConfig host] { - foreach my_port [dict get $myConfig port] { - dict with canonicalConfig { - if {$my_ip in $host && $my_port in $port} { - return 1 - } - } - } - } - return 0 + return [::acs::cluster current_server_is_canonical_server] } # Local variables: