Index: openacs-4/packages/acs-templating/www/resources/xinha-nightly/plugins/GetHtml/get-html.js =================================================================== RCS file: /usr/local/cvsroot/openacs-4/packages/acs-templating/www/resources/xinha-nightly/plugins/GetHtml/get-html.js,v diff -u -r1.2 -r1.2.2.1 --- openacs-4/packages/acs-templating/www/resources/xinha-nightly/plugins/GetHtml/get-html.js 11 Nov 2005 20:32:43 -0000 1.2 +++ openacs-4/packages/acs-templating/www/resources/xinha-nightly/plugins/GetHtml/get-html.js 1 Jan 2006 23:28:22 -0000 1.2.2.1 @@ -35,7 +35,8 @@ /*01*/ new RegExp().compile(/(\S*\s*=\s*)?_moz[^=>]*(=\s*[^>]*)?/gi),//strip _moz attributes /*02*/ new RegExp().compile(/\s*=\s*(([^'"][^>\s]*)([>\s])|"([^"]+)"|'([^']+)')/g),// find attributes /*03*/ new RegExp().compile(/\/>/g),//strip singlet terminators -/*04*/ new RegExp().compile(/<(br|hr|img|input|link|meta|param|embed)([^>]*)>/g),//terminate singlet tags +/*04*/ // new RegExp().compile(/<(br|hr|img|input|link|meta|param|embed)([^>]*)>/g),//terminate singlet tags +/*04*/ new RegExp().compile(/<(br|hr|img|input|link|meta|param|embed|area)((\s*\S*="[^"]*")*)>/g),//terminate singlet tags /*05*/ new RegExp().compile(/(checked|compact|declare|defer|disabled|ismap|multiple|no(href|resize|shade|wrap)|readonly|selected)([\s>])/gi),//expand singlet attributes /*06*/ new RegExp().compile(/(="[^']*)'([^'"]*")/),//check quote nesting /*07*/ new RegExp().compile(/&(?=[^<]*>)/g),//expand query ampersands @@ -45,13 +46,14 @@ /*11*/ new RegExp().compile(/\s+([^=\s]+)(="[^"]+")/g),// lowercase attribute names /*12*/ new RegExp().compile(/(\S*\s*=\s*)?contenteditable[^=>]*(=\s*[^>\s\/]*)?/gi),//strip contenteditable /*13*/ new RegExp().compile(/((href|src)=")([^\s]*)"/g), //find href and src for stripBaseHref() -/*14*/ new RegExp().compile(/<\/?(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|br|hr|img|embed|param|pre|script|html|head|body|meta|link|title)[^>]*>/g), +/*14*/ new RegExp().compile(/<\/?(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|br|hr|img|embed|param|pre|script|html|head|body|meta|link|title|area)[^>]*>/g), /*15*/ new RegExp().compile(/<\/(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|html|head|body|script)( [^>]*)?>/g),//blocklevel closing tag /*16*/ new RegExp().compile(/<(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|html|head|body|script)( [^>]*)?>/g),//blocklevel opening tag -/*17*/ new RegExp().compile(/<(br|hr|img|embed|param|pre|meta|link|title)[^>]*>/g),//singlet tag -/*18*/ new RegExp().compile(/(^|<\/(pre|script)>)(\s|[^\s])*?(<(pre|script)[^>]*>|$)/g),//exclude content between pre and script tags +/*17*/ new RegExp().compile(/<(br|hr|img|embed|param|pre|meta|link|title|area)[^>]*>/g),//singlet tag +/*18*/ new RegExp().compile(/(^|<\/(pre|script)>)(\s|[^\s])*?(<(pre|script)[^>]*>|$)/g),//find content NOT inside pre and script tags /*19*/ new RegExp().compile(/(]*>)(\s|[^\s])*?(<\/pre>)/g),//find content inside pre tags -/*20*/ new RegExp().compile(/(^|)((\s|\S)*?)(?=|$)/g)//exclude comments +/*20*/ new RegExp().compile(/(^|)((\s|\S)*?)(?=|$)/g),//find content NOT inside comments +/*21*/ new RegExp().compile(/\S*=""/g) //find empty attributes ]; /** @@ -64,11 +66,12 @@ replace(c[1], ' ').//strip _moz attributes replace(c[12], ' ').//strip contenteditable replace(c[2], '="$2$4$5"$3').//add attribute quotes + replace(c[21], ' ').//strip empty attributes replace(c[11], function(str, p1, p2) { return ' '+p1.toLowerCase()+p2; }).//lowercase attribute names replace(c[3], '>').//strip singlet terminators replace(c[9], '$1>').//trim whitespace - replace(c[4], '<$1$2 />').//terminate singlet tags replace(c[5], '$1="$1"$3').//expand singlet attributes + replace(c[4], '<$1$2 />').//terminate singlet tags replace(c[6], '$1$2').//check quote nesting // replace(c[7], '&').//expand query ampersands replace(c[8], '<').//strip tagstart whitespace @@ -93,9 +96,9 @@ if(HTMLArea.is_gecko) { //moz changes returns into
inside
 tags
 		s = s.replace(c[19], function(str){return str.replace(/
/g,"\n")}); } - s = s.replace(c[20], function(st,$1,$2,$3) { //exclude comments - strn = $3.replace(c[18], function(string) { //skip pre and script tags - string = string.replace(/[\n\r]/gi, " ").replace(/\s+/gi," ").replace(c[14], function(str) { + s = s.replace(c[18], function(strn) { //skip pre and script tags + strn = strn.replace(c[20], function(st,$1,$2,$3) { //exclude comments + string = $3.replace(/[\n\r]/gi, " ").replace(/\s+/gi," ").replace(c[14], function(str) { if (str.match(c[16])) { var s = "\n" + HTMLArea.__sindent + str; // blocklevel openingtag - increase indent @@ -116,8 +119,8 @@ } return str; // this won't actually happen }); - return string; - });return $1 + strn; + return $1 + string; + });return strn; }); if (s.charAt(0) == "\n") { return s.substring(1, s.length); @@ -159,13 +162,14 @@ if(root_tag == "html") { innerhtml = editor._doc.documentElement.innerHTML; } else { - innerhtml = editor._doc.body.innerHTML; + innerhtml = root.innerHTML; } - //pass tags to cleanHTML() one at a time; ignore comments and php tags + //pass tags to cleanHTML() one at a time //includes support for htmlRemoveTags config option - html += innerhtml.replace(/<[^\?!][^>]*>/gi, function(tag){ - if(!(editor.config.htmlRemoveTags && editor.config.htmlRemoveTags.test(tag.replace(/<([^\s>\/]+)/,'$1')))) - return editor.cleanHTML(tag); + html += innerhtml.replace(/<((<[^>]*>)*|[^<>]*)*>/gi, function(tag){ + if(/^<[!\?]/.test(tag)) return tag; //skip comments and php tags + else if(!(editor.config.htmlRemoveTags && editor.config.htmlRemoveTags.test(tag.replace(/<([^\s>\/]+)/,'$1')))) + return editor.cleanHTML(tag); else return ''}); //IE drops all tags in a list except the last one if(HTMLArea.is_ie) { @@ -174,7 +178,8 @@ replace(/<\/li>([\s\n]*<\/li>)+/g, '<\/li>'); } if(HTMLArea.is_gecko) - html = html.replace(/(.*)
\n$/, '$1'); //strip trailing
added by moz + html = html.replace(/(.*)
\n$/, '$1'). //strip trailing
added by moz + replace(/^\n(.*)/, '$1'); //strip leading newline added by moz if (outputRoot) { html += ""; }