Index: openacs-4/packages/acs-templating/www/resources/xinha-nightly/plugins/GetHtml/get-html.js
===================================================================
RCS file: /usr/local/cvsroot/openacs-4/packages/acs-templating/www/resources/xinha-nightly/plugins/GetHtml/get-html.js,v
diff -u -r1.2 -r1.2.2.1
--- openacs-4/packages/acs-templating/www/resources/xinha-nightly/plugins/GetHtml/get-html.js	11 Nov 2005 20:32:43 -0000	1.2
+++ openacs-4/packages/acs-templating/www/resources/xinha-nightly/plugins/GetHtml/get-html.js	1 Jan 2006 23:28:22 -0000	1.2.2.1
@@ -35,7 +35,8 @@
 /*01*/  new RegExp().compile(/(\S*\s*=\s*)?_moz[^=>]*(=\s*[^>]*)?/gi),//strip _moz attributes
 /*02*/  new RegExp().compile(/\s*=\s*(([^'"][^>\s]*)([>\s])|"([^"]+)"|'([^']+)')/g),// find attributes
 /*03*/  new RegExp().compile(/\/>/g),//strip singlet terminators
-/*04*/  new RegExp().compile(/<(br|hr|img|input|link|meta|param|embed)([^>]*)>/g),//terminate singlet tags
+/*04*/ // new RegExp().compile(/<(br|hr|img|input|link|meta|param|embed)([^>]*)>/g),//terminate singlet tags
+/*04*/  new RegExp().compile(/<(br|hr|img|input|link|meta|param|embed|area)((\s*\S*="[^"]*")*)>/g),//terminate singlet tags
 /*05*/  new RegExp().compile(/(checked|compact|declare|defer|disabled|ismap|multiple|no(href|resize|shade|wrap)|readonly|selected)([\s>])/gi),//expand singlet attributes
 /*06*/  new RegExp().compile(/(="[^']*)'([^'"]*")/),//check quote nesting
 /*07*/  new RegExp().compile(/&(?=[^<]*>)/g),//expand query ampersands
@@ -45,13 +46,14 @@
 /*11*/  new RegExp().compile(/\s+([^=\s]+)(="[^"]+")/g),// lowercase attribute names
 /*12*/  new RegExp().compile(/(\S*\s*=\s*)?contenteditable[^=>]*(=\s*[^>\s\/]*)?/gi),//strip contenteditable
 /*13*/  new RegExp().compile(/((href|src)=")([^\s]*)"/g), //find href and src for stripBaseHref()
-/*14*/  new RegExp().compile(/<\/?(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|br|hr|img|embed|param|pre|script|html|head|body|meta|link|title)[^>]*>/g),
+/*14*/  new RegExp().compile(/<\/?(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|br|hr|img|embed|param|pre|script|html|head|body|meta|link|title|area)[^>]*>/g),
 /*15*/  new RegExp().compile(/<\/(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|html|head|body|script)( [^>]*)?>/g),//blocklevel closing tag
 /*16*/  new RegExp().compile(/<(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|html|head|body|script)( [^>]*)?>/g),//blocklevel opening tag
-/*17*/  new RegExp().compile(/<(br|hr|img|embed|param|pre|meta|link|title)[^>]*>/g),//singlet tag
-/*18*/  new RegExp().compile(/(^|<\/(pre|script)>)(\s|[^\s])*?(<(pre|script)[^>]*>|$)/g),//exclude content between pre and script tags
+/*17*/  new RegExp().compile(/<(br|hr|img|embed|param|pre|meta|link|title|area)[^>]*>/g),//singlet tag
+/*18*/  new RegExp().compile(/(^|<\/(pre|script)>)(\s|[^\s])*?(<(pre|script)[^>]*>|$)/g),//find content NOT inside pre and script tags
 /*19*/  new RegExp().compile(/(<pre[^>]*>)(\s|[^\s])*?(<\/pre>)/g),//find content inside pre tags
-/*20*/  new RegExp().compile(/(^|<!--(\s|\S)*?-->)((\s|\S)*?)(?=<!--(\s|\S)*?-->|$)/g)//exclude comments
+/*20*/  new RegExp().compile(/(^|<!--(\s|\S)*?-->)((\s|\S)*?)(?=<!--(\s|\S)*?-->|$)/g),//find content NOT inside comments
+/*21*/  new RegExp().compile(/\S*=""/g) //find empty attributes
 ];
 
 /** 
@@ -64,11 +66,12 @@
 		replace(c[1], ' ').//strip _moz attributes
 		replace(c[12], ' ').//strip contenteditable
 		replace(c[2], '="$2$4$5"$3').//add attribute quotes
+		replace(c[21], ' ').//strip empty attributes
 		replace(c[11], function(str, p1, p2) { return ' '+p1.toLowerCase()+p2; }).//lowercase attribute names
 		replace(c[3], '>').//strip singlet terminators
 		replace(c[9], '$1>').//trim whitespace
-		replace(c[4], '<$1$2 />').//terminate singlet tags
 		replace(c[5], '$1="$1"$3').//expand singlet attributes
+		replace(c[4], '<$1$2 />').//terminate singlet tags
 		replace(c[6], '$1$2').//check quote nesting
 	//	replace(c[7], '&amp;').//expand query ampersands
 		replace(c[8], '<').//strip tagstart whitespace
@@ -93,9 +96,9 @@
 	if(HTMLArea.is_gecko) { //moz changes returns into <br> inside <pre> tags
 		s = s.replace(c[19], function(str){return str.replace(/<br \/>/g,"\n")});
 	}
-	s = s.replace(c[20], function(st,$1,$2,$3) { //exclude comments
-	  strn = $3.replace(c[18], function(string) { //skip pre and script tags
-		string = string.replace(/[\n\r]/gi, " ").replace(/\s+/gi," ").replace(c[14], function(str) {
+	s = s.replace(c[18], function(strn) { //skip pre and script tags
+	  strn = strn.replace(c[20], function(st,$1,$2,$3) { //exclude comments
+		string = $3.replace(/[\n\r]/gi, " ").replace(/\s+/gi," ").replace(c[14], function(str) {
 			if (str.match(c[16])) {
 				var s = "\n" + HTMLArea.__sindent + str;
 				// blocklevel openingtag - increase indent
@@ -116,8 +119,8 @@
 			}
 			return str; // this won't actually happen
 		});
-		return string;
-      });return $1 + strn;
+		return $1 + string;
+	  });return strn;
     });
     if (s.charAt(0) == "\n") {
         return s.substring(1, s.length);
@@ -159,13 +162,14 @@
 		if(root_tag == "html") {
 			innerhtml = editor._doc.documentElement.innerHTML;
 		} else {
-			innerhtml = editor._doc.body.innerHTML;
+			innerhtml = root.innerHTML;
 		}
-		//pass tags to cleanHTML() one at a time; ignore comments and php tags
+		//pass tags to cleanHTML() one at a time
 		//includes support for htmlRemoveTags config option
-		html += innerhtml.replace(/<[^\?!][^>]*>/gi, function(tag){
-			if(!(editor.config.htmlRemoveTags && editor.config.htmlRemoveTags.test(tag.replace(/<([^\s>\/]+)/,'$1'))))
-			return editor.cleanHTML(tag);
+		html += innerhtml.replace(/<((<[^>]*>)*|[^<>]*)*>/gi, function(tag){
+			if(/^<[!\?]/.test(tag)) return tag; //skip comments and php tags
+			else if(!(editor.config.htmlRemoveTags && editor.config.htmlRemoveTags.test(tag.replace(/<([^\s>\/]+)/,'$1'))))
+				return editor.cleanHTML(tag);
 			else return ''});
 		//IE drops  all </li> tags in a list except the last one
 		if(HTMLArea.is_ie) {
@@ -174,7 +178,8 @@
 				replace(/<\/li>([\s\n]*<\/li>)+/g, '<\/li>');
 		}
 		if(HTMLArea.is_gecko)
-			html = html.replace(/(.*)<br \/>\n$/, '$1'); //strip trailing <br> added by moz
+			html = html.replace(/(.*)<br \/>\n$/, '$1'). //strip trailing <br> added by moz
+				replace(/^\n(.*)/, '$1'); //strip leading newline added by moz
 		if (outputRoot) {
 			html += "</" + root_tag + ">";
 		}