Index: openacs-4/packages/acs-mail-lite/sql/postgresql/acs-mail-lite-create.sql =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/acs-mail-lite/sql/postgresql/acs-mail-lite-create.sql,v diff -u -r1.13 -r1.14 --- openacs-4/packages/acs-mail-lite/sql/postgresql/acs-mail-lite-create.sql 27 Oct 2014 16:39:40 -0000 1.13 +++ openacs-4/packages/acs-mail-lite/sql/postgresql/acs-mail-lite-create.sql 17 Feb 2018 17:08:31 -0000 1.14 @@ -65,3 +65,360 @@ notification_time timestamptz default current_timestamp, notification_count integer default 0 ); + +-- +-- Add Incoming Mail Processing +-- +create sequence acs_mail_lite_in_id_seq; + +-- New tables + +-- table tracking incoming email +create table acs_mail_lite_from_external ( + aml_email_id integer primary key + not null + DEFAULT nextval ('acs_mail_lite_id_seq'), + -- Priority for processing incoming email in queue. + -- Lower number processed first. + priority integer, + -- using varchar instead of text for indexing + -- to and from email are defined according to headers. + -- See table acs_mail_lite_ie_headers + to_email_addrs varchar(1000), + from_email_addrs text, + subject text, + -- see acs_mail_lite_send_msg_id_map.msg_id + msg_id bigint, + -- used by prioritization calculations + -- For IMAP4 this is size defined by rfc822 + size_chars numeric, + -- time email received from server in seconds since tcl epoch + received_cs bigint, + -- Answers question: + -- Has all ACS Mail Lite processes finished for this email? + -- Processes like parsing email, bounced email, input validation + processed_p boolean, + -- Answers question: + -- Have all callbacks related to this email finished processing? + -- Upon release, delete all components of aml_email_id also from + -- tables acs_mail_lite_ie_headers, acs_mail_lite_ie_body_parts, and + -- acs_mail_lite_ie_files. + -- Release essentially means its available to be deleted. + release_p boolean +); + +create index acs_mail_lite_from_external_aml_email_id_idx + on acs_mail_lite_from_external (aml_email_id); +create index acs_mail_lite_from_external_processed_p_idx + on acs_mail_lite_from_external (processed_p); +create index acs_mail_lite_from_external_release_p_idx + on acs_mail_lite_from_external (release_p); + + + +-- Some services are offered between sessions of importing incoming email. +-- A unique ID provided by +-- acs_mail_lite_email_uid_id_map.uid_ext +-- is designed to +-- support UIDs for each email that are consistent between import sessions +-- from external source, such as specified by IMAP4 rfc3501 +-- https://tools.ietf.org/html/rfc3501 +-- It is also expected that each mailbox.host, mailbox and user are +-- consistent for duration of the service. +-- And yet, experience knows that sometimes email servers change +-- and UIDs for an email change with it. +-- Users switching email servers of an email account using a IMAP4 client +-- might hassle with moving email, but +-- in the process they generally know what is happening. They don't re-read +-- all the email. +-- We want to avoid this server re-reading and processing email +-- that has already been processed, when the UID of emails change. +-- The Questions become: + +-- What scenarios might we run into? +-- Another user reseting flags. +-- A server migration or restore with some conflicting UIDs. + +-- Can we recognize a change in server? +-- If so, can we signal ACS Mail Lite to ignore existing email +-- in a new environment? +-- Also, we should have a manual override to not ignore or ignore +-- in case of false positive and false negative triggers. + +-- Can we recognize if another user accesses the same email account +-- and arbitrarily selects some prior messages to unread? +-- Yes. The purpose of acs_mail_lite_email_uid_id_map is to act as a log +-- of prior processed messages. +-- If total new messages is a significant percentage of all messages +-- and service has been working for a week or more, +-- use statistics to generate a reasonable trigger. +-- Weekly produce a revised distribution curve of weekly counts of incoming. +-- If percentile is over 200%.. ie twice the weekly maximum.. +-- Also, test, for example, if new message count is less than +-- prior total, there's more of a chance that they are new messages; +-- Maybe check for one or two duplicates. +-- If new message count is over the total prior messsage count, flag a problem. +-- rfc3501 2.3.1.1. ..A client can only assume.. at the time +-- that it obtains the next unique identifier value.. that +-- messages arriving after that time will have a UID greater +-- than or equal to that value... + + +-- Can we recognize a change in server? +-- rfc3501 does not specify a unqiue server id +-- It specifies a unique id for a mailbox: UIDVALIDITY +-- UIDVALIDITY is optional, quite useful. +-- Rfc3501 specifies a unique id for each email: UID. +-- We can assign each email a more unique reference: +-- mailbox.host + mailbox.name + UIDVALIDITY (of mailbox) + UID. +-- We are more specific so that we detect more subtle cases of +-- server change, where checks by UID and UIDVALIDITY may not. + + +-- For example, when migrating email service and +-- and the new system initially restores the UIVALIDITY and message UID, +-- but references a different state of each email. The cause +-- of such cases are reasonable. For example, restoring +-- from backup to a new email host or restoring +-- before some batch event changed a bunch of things. So, +-- src_ext = mailbox.host + (user?) + mailbox.name + UIDVALIDITY +-- Leave user out for now.. +-- Priority is to have a robust way to ignore +-- prior messages recognized as 'new' messages. + +create table acs_mail_lite_email_uid_id_map ( + -- unqique internal inbound email id + -- src_ext_id identifies source, but is redundant + -- for identifying a unique email. + aml_email_id integer not null, + --uisng varchar instead of text for indexing purposes + -- Each UID externally defined such as from imap4 server + uid_ext varchar(3000) not null, + -- Each external source may apply a different uid. + -- This is essentialy an arbitrary constant frame reference between + -- connecting sessions with external server in most scenarios. + -- For IMAP4v1 rfc3501 2.3.1.1. item 4 ..combination of + -- mailbox.name, UIDVALIDITY, and UID must refer to a single + -- immutable message on that server forever. + -- default is: + -- ExternalSource parameter mailbox.name + -- and UIDVALIDITY with dash as delimiter + -- where ExternalSource parameter is + -- either blank or maybe mailbox.host for example. + -- external source reference id + -- see acs_mail_lite_email_src_ext_id_map.aml_src_id + src_ext_id integer +); + +create index acs_mail_lite_email_uid_id_map_uid_ext_idx + on acs_mail_lite_email_uid_id_map (uid_ext); +create index acs_mail_lite_email_uid_id_map_src_ext_id_idx + on acs_mail_lite_email_uid_id_map (src_ext_id); + +create table acs_mail_lite_email_src_ext_id_map ( + aml_src_id integer not null, + src_ext varchar(1000) not null +); + + + +-- Packages that are services, such as ACS Mail Lite, do not have a web UI. +-- Scheduled procs cannot read changes in values of package parameters +-- or get updates via web UI connections, or changes in tcl via apm. +-- Choices are updates via nsv variables and database value updates. +-- Choices via database have persistence across server restarts. +-- Defaults are set in acs_mail_lite::sched_parameters +-- These all are used in context of processing incoming email +-- unless stated otherwise. +-- Most specific flag takes precedence. +-- If an email is flagged high priority by package_id and +-- low priority by subject glob. It is assigned low priority. +-- Order of specificity. +-- medium default package_id party_id subject_id object_id +-- party_id can be group_id or user_id +-- If fast and low flag the same specificity for an email, low is chosen. +create table acs_mail_lite_ui ( + -- scan_replies_est_dur_per_cycle_s_override see www/doc/analysis-notes + sredpcs_override integer, + -- Answers question: Reprocess old email? + reprocess_old_p boolean, + -- Max number of concurrent threads for high priority processing + max_concurrent integer, + -- Any incoming email body part over this size is stored in file + -- instead of database. + max_blob_chars integer, + -- Minimum threashold for default medium (standard) priority + mpri_min integer, + -- Maximum value for default medium (standard) priority + mpri_max integer, + --space delimited list of package_ids to process at fast/high priority + hpri_package_ids text, + --space delimited list of package_ids to process at low priority + lpri_package_ids text, + --space delimited list of party_ids to process at fast/high priority + hpri_party_ids text, + --space delimited list of party_ids to process at low priority + lpri_party_ids text, + -- a glob for searching subjects to flag for fast/high priority + hpri_subject_glob text, + -- a glob for searching subjects to flag for low priority + lpri_subject_glob text, + --space delimited list of object_ids to process at fast/high priority + hpri_object_ids text, + --space delimited list of object_ids to process at low priority + lpri_object_ids text, + --filters to reject input as early as possible in processing inbound + --Each filter is a name value in standard tcl list format + --where name is a header name + reject_on_hit text, + reject_on_miss text +); + + +-- This table has similar requirements to acs_mail_lite_ui +-- proc acs_mail_lite_imap_conn_* needs to be able to update values +-- within scheduled procs without restarting server. +-- Port is ignored. Added because it is a common requirement of connections +-- that might one day be useful here, too. +create table acs_mail_lite_imap_conn ( + -- mailbox.host + ho text, + -- you guessed it + pa text, + -- port + po integer, + --timeout + ti integer, + -- user + us text, + -- mailbox.name See nsimap documentation for definition + na text, + -- space separated list of flags for imap related modifications + -- ssl means connect via ssl. + -- novalidatecert means accept a self-signed certificate + fl text +); + + +-- Following tables store parsed incoming email for processing by callbacks +-- defined in the rest of OpenACS + +-- incoming email headers +-- There should be a size limit per unit time from each source +-- to prevent DDOS attacks and such (at least to the imap system). +-- +create table acs_mail_lite_ie_headers ( + -- incoming email + -- only includes headers useful in processing the queue + -- Such as + -- size + -- from + aml_email_id integer, + -- header name, one header per row + -- For all headers together, see acs_mail_lite_ie_parts.c_type=headers + -- Special case: h_name = struct means + -- h_value contains entire value returned from ns_imap struct + -- as a tcl list + h_name text, + h_value text +); + +create index acs_mail_lite_ie_headers_aml_email_id_idx + on acs_mail_lite_ie_headers (aml_email_id); + +-- incoming email body parts +-- including email file attachments and file content +-- A part may be a filename. The filename data model is added +-- to the parts table to reduce code complexity. +-- An attached or inline file is a kind of part. +create table acs_mail_lite_ie_parts ( + aml_email_id integer, + section_id integer, + + -- In addition to content_type, there is a special case: + -- headers, which contains all headers for email + -- content_type = c_type + c_type text, + -- If type has a filename, this is original filename. + filename text, + -- If c_type is multipart, content is blank. part_id is branched. + content text, + -- An alternate filepathname for large blob, or + -- A local absolute filepath location + c_filepathname text +); + +create index acs_mail_lite_ie_parts_aml_email_id_idx + on acs_mail_lite_ie_parts (aml_email_id); + + +-- incoming email parts, name value pairs of +create table acs_mail_lite_ie_part_nv_pairs ( + aml_email_id integer, + -- Usage is same as acs_mail_lite_ie_parts.section_id + section_id integer, + -- name value pair + p_name text, + p_value text +); + +create index acs_mail_lite_ie_part_nv_pairs_aml_email_id_idx + on acs_mail_lite_ie_part_nv_pairs (aml_email_id); + +create table acs_mail_lite_ie_section_ref_map ( + -- 'Section' refers to usage with 'part' reference in 'ns_imap body' + -- Email parts can contain multiple parts. + -- Each multiple part can contain multiple parts. + + -- Section_ref is an absolute reference of a part + -- including the parts it is contained in, and + -- delimited by period. + -- It is defined by: + -- ns_imap body #s msgno part + -- And yet, this reference system holds for any email + -- storage, so is adopted for generic use as well. + + -- Default reference is value of 1. + -- A two part message has values 1 and 2. + -- Part 2 of a 3 part email (2/3) has reference '2' + -- If part 2 is also multiple parts, then + -- part 1 of part 2 of email has reference '2.1' and so on. + + -- Mapping is constant for each case. + -- For example, '1.2.2.1' will always point to the same integer. + -- So do not alter values as they are likely used by + -- multiple emails. + + + section_ref varchar(300), + section_id integer +); + +create index acs_mail_lite_ie_section_ref_map_section_ref_idx + on acs_mail_lite_ie_section_ref_map (section_ref); +create index acs_mail_lite_ie_section_ref_map_section_id_idx + on acs_mail_lite_ie_section_ref_map (section_id); + +-- +create table acs_mail_lite_send_msg_id_map ( + -- a randomized number unique to this table + -- unique not null + msg_id text primary key, + package_id integer + constraint aml_package_id_fk + references apm_packages, + party_id integer + constraint aml_from_external_party_id_fk + references parties (party_id), + object_id integer + constraint aml_from_external_obect_id_fk + references acs_objects (object_id), + -- Indicate approximate time when this email is created + datetime_cs integer, + -- other data or parameters to associate with email + other text +); + +create index acs_mail_lite_send_msg_id_map_msg_id_idx + on acs_mail_lite_send_msg_id_map (msg_id); +