Index: openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl
===================================================================
RCS file: /usr/local/cvsroot/openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl,v
diff -u -r1.11 -r1.12
--- openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl 27 Jan 2003 10:30:01 -0000 1.11
+++ openacs-4/packages/acs-tcl/tcl/text-html-procs.tcl 27 Jan 2003 13:49:44 -0000 1.12
@@ -16,12 +16,17 @@
ad_proc -public ad_text_to_html {
-no_links:boolean
+ -no_lines:boolean
+ -no_quote:boolean
text
} {
Converts plaintext to html. Also translates any recognized
email addresses or URLs into a hyperlink.
@param no_links will prevent it from highlighting
+ @param no_quote will prevent it from HTML-quoting output, so this can be run on
+ semi-HTML input and preserve that formatting. This will also cause spaces/tabs to not be
+ replaced with nbsp's, because this can too easily mess up HTML tags.
@author Branimir Dolicki (branimir@arsdigita.com)
@author Lars Pind (lars@pinds.com)
@@ -39,11 +44,14 @@
# (bd) The only purpose of thiese sTaRtUrL and
# eNdUrL markers is to get rid of trailing dots,
# commas and things like that. Note that there
- # is a TAB before and after each marker.
+ # is a \x001 special char before and after each marker.
- regsub -nocase -all {([^a-zA-Z0-9]+)(http://[^\(\)"<>\s]+)} $text "\\1\tsTaRtUrL\\2eNdUrL\t" text
- regsub -nocase -all {([^a-zA-Z0-9]+)(https://[^\(\)"<>\s]+)} $text "\\1\tsTaRtUrL\\2eNdUrL\t" text
- regsub -nocase -all {([^a-zA-Z0-9]+)(ftp://[^\(\)"<>\s]+)} $text "\\1\tsTaRtUrL\\2eNdUrL\t" text
+ regsub -nocase -all {([^a-zA-Z0-9]+)(http://[^\(\)"<>\s]+)} $text "\\1\x001sTaRtUrL\\2eNdUrL\x001" text
+ regsub -nocase -all {([^a-zA-Z0-9]+)(https://[^\(\)"<>\s]+)} $text "\\1\x001sTaRtUrL\\2eNdUrL\x001" text
+ regsub -nocase -all {([^a-zA-Z0-9]+)(ftp://[^\(\)"<>\s]+)} $text "\\1\x001sTaRtUrL\\2eNdUrL\x001" text
+
+ # Don't dress URLs that are already links
+ regsub -nocase -all {(href\s*=\s*['"]?)\x001sTaRtUrL([^\x001]*)eNdUrL\x001} $text {\1\2} text
# email links have the form xxx@xxx.xxx
# JCD: don't treat things =xxx@xxx.xxx as email since most
@@ -52,50 +60,81 @@
# work correctly). It's all quite ugly.
regsub -nocase -all {([^a-zA-Z0-9=]+)(mailto:)?([^=\(\)\s:;,@<>]+@[^\(\)\s.:;,@<>]+[.][^\(\)\s:;,@<>]+)} $text \
- "\\1\tsTaRtEmAiL\\3eNdEmAiL\t" text
-
-
+ "\\1\x001sTaRtEmAiL\\3eNdEmAiL\x001" text
}
# At this point, before inserting some of our own <, >, and "'s
# we quote the ones entered by the user:
- set text [ad_quotehtml $text]
+ if { !$no_quote_p } {
+ set text [ad_quotehtml $text]
+ }
- # Convert _single_ CRLF's to
's to preserve line breaks
- regsub -all {\r*\n} $text "
\n" text
+ # Convert line breaks
+ if { !$no_lines_p } {
+ set text [util_convert_line_breaks_to_html $text]
+ }
- # Convert every two spaces to an nbsp
- regsub -all { } $text "\\\ " text
-
- # turn CRLFCRLF into
- if { [regsub -all {\r\n\s*\r\n} $text "
" text] == 0 } { - # try LFLF - if { [regsub -all {\n\s*\n} $text "
" text] == 0 } { - # try CRCR - regsub -all {\r\s*\r} $text "
" text - } + if { !$no_quote_p } { + # Convert every two spaces to an nbsp + regsub -all { } $text "\\\ " text + + # Convert tabs to four nbsp's + regsub -all {\t} $text {\ \ \ \ } text } - + if { !$no_links_p } { + # Move the end of the link before any punctuation marks at the end of the URL + regsub -all {([]!?.:;,<>\(\)\}"'-]+)(eNdUrL\x001)} $text {\2\1} text + regsub -all {([]!?.:;,<>\(\)\}"'-]+)(eNdEmAiL\x001)} $text {\2\1} text + # Dress the links and emails with A HREF - regsub -all {([]!?.:;,<>\(\)\}"'-]+)(eNdUrL\t)} $text {\2\1} text - regsub -all {([]!?.:;,<>\(\)\}"'-]+)(eNdEmAiL\t)} $text {\2\1} text - regsub -all {\tsTaRtUrL([^\t]*)eNdUrL\t} $text {\1} text - regsub -all {\tsTaRtEmAiL([^\t]*)eNdEmAiL\t} $text {\1} text + regsub -all {\x001sTaRtUrL([^\x001]*)eNdUrL\x001} $text {\1} text + regsub -all {\x001sTaRtEmAiL([^\x001]*)eNdEmAiL\x001} $text {\1} text set text [string trimleft $text] } - # Convert every tab to 4 nbsp's - regsub -all {\t} $text {\ \ \ \ } text - # JCD: Remove all the eNd sTaRt stuff and warn if we do it since its bad # to have these left (means something is broken in our regexps above) - if {[regsub -all {(sTaRtUrL|eNdUrL|sTaRtEmAiL|eNdEmAiL)} $text {} text]} { + if {[regsub -all {(\x001sTaRtUrL|eNdUrL\x001|\x001sTaRtEmAiL|eNdEmAiL\x001)} $text {} text]} { ns_log warning "Replaced sTaRt/eNd magic tags in ad_text_to_html" } + return $text } +ad_proc -public util_convert_line_breaks_to_html { + text +} { + Convert line breaks to
and
tags, respectively.
+} {
+ # Remove any leading or trailing whitespace
+ regsub {^[\s]*} $text {} text
+ regsub {[\s]*$} $text {} text
+
+ # Make sure all line breaks are single \n's
+ regsub -all {\r\n} $text "\n" text
+ regsub -all {\r} $text "\n" text
+
+ # Remove whitespace around \n's
+ regsub -all {\s+\n\s+} $text "\n" text
+
+ # Wrap P's around paragraphs
+ set text "
$text
" + regsub -all {([^\n\s])\n\n([^\n\s])} $text {\1\2} text
+
+ # Convert _single_ CRLF's to
's to preserve line breaks
+ # Lars: This must be done after we've made P tags, because otherwise the line
+ # breaks will already have been converted into BR's.
+ regsub -all {\n} $text "
\n" text
+
+ # Add line breaks to P tags
+ regsub -all {
[ad_text_to_html -no_lines -- $text]" } - text/plain - - text { - return [wrap_string $text 70] + text/plain { + set text [wrap_string $text 70] } - default { - return -code error "Can only convert to text or html" + } + } + text/html { + switch $to { + text/html { + set text [util_close_html_tags $text] } + text/plain { + set text [ad_html_to_text -- $text] + } } } - default { - return -code error "Can only convert from text or html" - } } + + ns_log Notice "LARS: Converted from $from to $to, result=$text" + + return $text } +ad_proc -public ad_enhanced_text_to_html { + text +} { + Converts enhanced text format to normal HTML. + @author Lars Pind (lars@pinds.com) + @creation-date 2003-01-27 +} { + return [ad_text_to_html -no_quote -- [util_close_html_tags $text]] +} + +ad_proc -public ad_enhanced_text_to_plain_text { + text +} { + Converts enhanced text format to normal plaintext format. + @author Lars Pind (lars@pinds.com) + @creation-date 2003-01-27 +} { + # Convert the HTML version to plaintext. + return [ad_html_to_text -- [ad_enhanced_text_to_html -- $text]] +} + + + ad_proc -public ad_convert_to_html { {-html_p f} text