﻿function pjsCleanContent(
        content
        , tagsDeletedCompletely
        , tagsKeptWithAttributes
        , allowedAttributeList
        , tagStylesKept
        , tagsKeptRemovingAllAttributes
        , replaceBrWithP
        ) {
    try {
        //Move our editor text into the DOM, eliminating empty paragraphs.
        var TOP_LEVEL_ELEMENT_TAG = "DIV_TOP_ELEMENT";
        var dom = document.createElement(TOP_LEVEL_ELEMENT_TAG);
        //Do not set dom.innerHTML as in IE this would cause an image's src or a link's href 
        // to be converted to absolute path. This is a severe IE quirk (per a Telerik example).
        Telerik.Web.UI.Editor.Utils.setElementInnerHtml(dom, content);

        //Remove the unsupported tags we delete completely
        pjsRemoveElementsByTag(dom, tagsDeletedCompletely);

        //Process remaining elements, applying the specified transformations
        pjsCleanNode(
        TOP_LEVEL_ELEMENT_TAG
        , dom
        , tagsKeptWithAttributes
        , allowedAttributeList
        , tagStylesKept
        , tagsKeptRemovingAllAttributes
        );
        
        var textOut = dom.innerHTML;
    }
    catch (e) {
        //If we generated an error, return the original content.
        alert("A problem occured while verifying the HTML formatting. "
            + "Please check your text and contact support if the problem continues.\r\n"
            + "Error #" + (e.number & 0xFFFF) + ": " + e.description);
        var textOut = content;
    }
    //Replace non-blanking space with a regular space.
    //Replace multiple white spaces with a single space, which also removes CrLf.
    //Remove empty paragraph tags since the editor sometimes inserts them.
    //Remove html comments which appear when pasting from Word into FF3.
    //Trim the final string.
    try {
        textOut = textOut
         .replace(/<!--[\s\S]*-->/g, "").replace(/&lt;!--[\s\S]*--&gt;/g, "")
         .replace(/&nbsp;/g, " ")
         .replace(/\u00A0/g, " ")
         .replace(/\s+/g, " ")
         .replace(/<p>\s*<\/p>/gi, "")
         .replace(/^\s\s*/, '').replace(/\s\s*$/, '')

        //This is not very good practice since we have unclosed <p> tags, _
        // but it accomplishes the goal for working with Word and web display, _
        // and making the change correctly is more complicated.
        if (replaceBrWithP) {
            textOut = textOut.replace(/<br\s*\/?>/gi, "<p>")
        }
    }
    catch (e) { }

    //Display results when debugging
    if (false) {
        var chunkSize = 1500;
        var chunkCount = textOut.length / chunkSize;
        for (var i = 0; i < chunkCount; i++) {
            alert("HTML Filter Part " + i + ": " + textOut.substr(1 + i * chunkSize, chunkSize));
        }
    }
    
    return textOut;
}

function pjsRemoveElementsByTag(dom, tagsToBeDeleted) {
    for (var i = 0; i < tagsToBeDeleted.length; i++) {
        var elems = dom.getElementsByTagName(tagsToBeDeleted[i]);
        for (var j = elems.length - 1; j >= 0; j--) {
            //Use the try-catch in case one of our elements was a child of an already-deleted element
            try {
                var elem = elems[j];
                elem.parentNode.removeChild(elem);
            }
            catch (e) { }
        }
    }
}

function pjsCleanNode(
    TOP_LEVEL_ELEMENT_TAG
    , node
    , tagsKeptWithAttributes
    , allowedAttributeList
    , tagStylesKept
    , tagsKeptRemovingAllAttributes
    ) {
    var fNodeDeleted = false;   //Return value- will be True if this node is deleted.
    //What kind of node are we processing?
    //Elements
    if (node.nodeType == 1) {
        //Use recursion to clean child nodes before processing this node
        if (node.hasChildNodes) {
            var childList = node.childNodes;
            for (var i = 0; i < childList.length; i++) {
                var child = childList[i];
                //Text nodes (type=3) do not need any cleaning.
                if (child.NodeType != 3) {
                    var fChildDeleted = pjsCleanNode(
                     TOP_LEVEL_ELEMENT_TAG
                     , child
                     , tagsKeptWithAttributes
                     , allowedAttributeList
                     , tagStylesKept
                     , tagsKeptRemovingAllAttributes
                    );
                    if (fChildDeleted) {
                        if (node.hasChildNodes) {
                            i--;    //Decrement child index so the next child will be cleaned.
                        }
                        else {
                            break;
                        }
                    }
                }
            }
        }

        var tag = node.tagName.toUpperCase();
        var tagSearch = "|" + tag + "|";

        if (tag == TOP_LEVEL_ELEMENT_TAG) {
            //Ignore the temporary element which contains the rest of the markup.
        }
        //Tags we keep with specified attributes
        else if (tagsKeptWithAttributes.indexOf(tagSearch) >= 0) {
            //Remove unsupported attributes and style properties
            var stylesToKeep = tagStylesKept[tag];
            if (stylesToKeep == undefined) stylesToKeep = "";
            pjsRemoveElementAttributes(node, allowedAttributeList, stylesToKeep);
            //Remove an empty SPAN element
            if (tag == "SPAN") {
                fNodeDeleted = pjsRemoveElementIfNoAllowedAttributes(node, allowedAttributeList)
            }
        }
        //Tags we keep while removing all attributes.
        else if (tagsKeptRemovingAllAttributes.indexOf(tagSearch) >= 0) {
            pjsRemoveElementAttributes(node, "", "");
        }
        //Tags for which we keep any text and other child elements 
        // within the element but remove the tag itself.
        //NOTE: This is the default behavior if we haven't specified anything else.
        else {
            pjsMoveChildElementsUpTheTreeAndRemoveElement(node, tag);
            fNodeDeleted = true;
        }
    }
    //Attributes & text elements (nodeType == 2 & 3) will be processed after the element children.
    //Delete other node types: CDATA, Entity Reference, Entity, Processing Instruction, 
    // Comment, Document, Doc Type, Doc Fragment, Notation
    else if (node.nodeType >= 4) {
        //Remove any of these node types
        node.parentNode.removeChild(node);
        fNodeDeleted = true;
    }
    return fNodeDeleted;
}

function pjsMoveChildElementsUpTheTreeAndRemoveElement(element, tag) {
    //We replace anchor tag with text dependent on both displayed text and the href.
    if (tag == "A") {
        var linkText = pjsGetAnchorElementTextToKeep(element);
        //IE (6 & 7) sometimes trims text elements. Add trailing space
        // unless the next character is a space or a comma.
        var nextSib = element.nextSibling;
        if (nextSib != null && nextSib.nodeType == 3) {
            var nextTextStart = nextSib.nodeValue.charAt(0);
            if (! (nextTextStart == " " || nextTextStart == ",") ) {
                linkText += " ";
            }
        }
        var child = document.createTextNode(linkText);
        element.parentNode.replaceChild(child, element);
    }
    else {
        //Child nodes include any text that's part of the element.
        if (element.hasChildNodes) {
            //Don't use the childNodes collection because it changes as we process nodes.
            var child = element.firstChild;
            while (child != null) {
                //IE (6 & 7) sometimes trims text elements. Add trailing space
                // unless the next character is a space or a comma.
                if (child.nodeType == 3) {
                    //If this is the last child, look at the parent element's next sibling.
                    if (child == element.lastChild) {
                        var nextSib = element.nextSibling;
                    }
                    //Otherwise, look at the next child.
                    else {
                        var nextSib = element.childNodes[1];
                    }
                    if (nextSib != null && nextSib.nodeType == 3) {
                        var nextTextStart = nextSib.nodeValue.charAt(0);
                        if (!(nextTextStart == " " || nextTextStart == ",")) {
                            child.nodeValue += " ";
                        }
                    }
                }
                element.parentNode.insertBefore(child, element);
                child = element.firstChild;
            }
        }
        element.parentNode.removeChild(element);
    }
}

function pjsRemoveElementAttributes(elem, allowedAttributeList, stylesToKeep) {
    //Remove unsupported attributes (not in allowedAttributeList) from DOM element elem.
    //Are there any allowed attributes?
    var fhasAllowedAttributes = (allowedAttributeList.length > 0);
    var attrs = elem.attributes;
    for (var j = attrs.length - 1; j >= 0; j--) {
        var attr = attrs[j];
        //Has the user specified a value for this attribute?
        // Every tag has ~80 attributes in IE since it includes all attributes, 
        // not just the user-specified attributes.
        if (attr.specified) {
            //Leave attribute name in it's original upper/lower case for later code that clears values and removes
            var attrName = attr.name;
            //Do we remove this attribute?
            if (!fhasAllowedAttributes || allowedAttributeList.indexOf("|" + attrName.toUpperCase() + "|") < 0) {
                pjsRemoveAttribute(elem, attr);
            }
            else if (attrName.toUpperCase() == "STYLE" && stylesToKeep.length > 0) {
                //Remove any undesired style settings
                pjsCleanStyleSettings(elem, attr, stylesToKeep);
            }
        }
    }
}

function pjsRemoveAttribute(element, attribute) {
    //Leave attribute name in it's original upper/lower case for later code that clears values and removes
    var attrName = attribute.name;
    //Set an empty string or null value since some attributes (like Class)
    // do not appear to get removed unless the value is "".
    //IE7 requires removing className as well as class to actually remove the tag.
    // Some attribute values throw an error, maybe depending on value type?

    try {
        if (attrName.toLowerCase() == "class") {
            element.className = "";
        }
        element.setAttribute(attrName, "");
        attribute.nodeValue = "";
    }
    catch (e) {
        try {
            element.setAttribute(attrName, null);
            attribute.nodeValue = null;
        }
        catch (e2) { }
    }
        
    element.removeAttribute(attrName);
    if (attrName.toLowerCase() == "class") {
        try {
            //This is needed for IE7 to actually remove the class attribute.
            // Without this line, IE7 leaves the empty attribute: class="".
            element.removeAttribute("className");
            //alert("Removed class attribute.\r\n\r\nParent HTML=" + element.parentNode.innerHTML);
        }
        catch (e) { }
    }
}

function pjsCleanStyleSettings(element, attribute, stylesToKeep) {
    //Remove any dis-allowed style settings
    //Format of stylesToKeep: {style-name, "|acceptable|value|list|string|"}
    var thisStyle = element.style;
    //Initialize to NOT keeping the style attribute. We set this false if we find any valid setting.
    var fDeleteStyleAttribute = true;
    //*** NOTE ***
    //  cssText is NOT a W3C standard style property, but is supported by most browsers.
    var cssText = thisStyle.cssText;

//    //Interesting alternate approach, but IE & FF return very different prop sets.
//    //The lengthy if() clause gets us just the set properties, in IE7 & FF3.
//    //To use this approach, it would need testing in other browsers.
//    var msg = "";
//    for (prop in thisStyle) {
//        var propValue = thisStyle[prop];
//        if (
//         propValue != null
//         && propValue.length > 0
//         && propValue.toString().indexOf("function ") != 0
//         && propValue != false
//         && propValue != "false"
//         && prop != "cssText"
//         && prop != "0"
//         ) {
//            msg += prop + "=" + propValue + "\r\n";
//        }
//    }
//    alert(msg);
    
    var cssSettings = cssText.split(";");
    for (var i = 0; i < cssSettings.length; i++) {
        //Get and trim the next setting
        var settingText = cssSettings[i].toLowerCase().replace(/^\s\s*/, '').replace(/\s\s*$/, '');
        if (settingText.length > 0) {
            //Extract the property name and value, and trim using RegEx
            var settingArray = settingText.split(":");
            var propName = settingArray[0].replace(/^\s\s*/, '').replace(/\s\s*$/, '');
            //Is there a property value?
            if (settingArray.length > 0) {
                var propValue = settingArray[1].replace(/^\s\s*/, '').replace(/\s\s*$/, '');
                //Is this an allowed property?
                propAllowedValueString = stylesToKeep[propName];
                //Is this an allowed setting?
                var allowedSetting =
                    propValue.length > 0
                    && propAllowedValueString != null
                    && propAllowedValueString.indexOf("|" + propValue.toLowerCase() + "|") >= 0
                if (allowedSetting) {
                    fDeleteStyleAttribute = false;
                }
                else {
                    //Remove this setting. First convert propName to "standard" css property name.
                    //e.g., top-margin-width becomes topMarginWidth
                    var propParts = propName.toLowerCase().split("-");
                    var propertyName = propParts[0];
                    for (var j = 1; j < propParts.length; j++) {
                        var partNext = propParts[j];
                        partNext = partNext.charAt(0).toUpperCase() + partNext.substring(1, partNext.length);
                        propertyName += partNext;
                    }
                    //Grouped properties, like 'font', cannot be set (at least in IE7) and generate an error.
                    try {
                        thisStyle[propertyName] = "";
                    }
                    catch (e) {
                        //Remove the style if we can't clear a dis-allowed setting.
                        // Or we could loop through all the properties that start with this name, clearing the specific settings.
                        fDeleteStyleAttribute = true;
                        break;
                    }
                }
            }
        }
    }
    //If none of the style properties were valid, remove the style attribute.
    if (fDeleteStyleAttribute) {
        //thisStyle.cssText = "";
        pjsRemoveAttribute(element, attribute);
    }
}

function pjsRemoveElementIfNoAllowedAttributes(element, allowedAttributeList) {
    //For now, this routine is only processing SPAN elements, which can be removed when empty.
    //e.g., allowedAttributeList = "|ALIGN|STYLE|"
    var attributesAllowed =
        allowedAttributeList.substring(1, allowedAttributeList.length - 1).toLowerCase().split("|");
    var removeElement = true;
    for (var j = 0; j < attributesAllowed.length; j++) {
        var attr = element.getAttributeNode(attributesAllowed[j]);
        if (attr != null && attr.specified) {
            removeElement = false;
            break;
        }
    }
    if (removeElement) {
        pjsMoveChildElementsUpTheTreeAndRemoveElement(element, element.tagName);
    }
    return removeElement;
}

function pjsGetAnchorElementTextToKeep(elem) {
    //var newText = elem.innerHTML;
    //Loop through child nodes to get the text
    var newText = "";
    if (elem.hasChildNodes) {
        for (var i = 0; i < elem.childNodes.length; i++) {
            var child = elem.childNodes[i];
            if (child.nodeType == 3) {
                newText += child.nodeValue + " ";
            }
        }
        newText = newText.trim();
    }

    //There can be an A link without an href, in which case, we just keep the displayed text.
    var linkHref = elem.getAttribute("href");
    if (linkHref != null) {
        linkHref = linkHref.trim();
        //Mark an Anchor tag with scripting for removal
        if (linkHref.indexOf("javascript") == 0) {
            newText = "";
        }
        //Combine href with displayed text if they are different, since we will remove the tag.
        else if (
                 linkHref.length > 0
                 && newText != linkHref
                 && "http://" + newText != linkHref
                 && "http://" + newText + "/" != linkHref
                 && newText != linkHref + "/"
                 ) {
            newText = newText + " [" + linkHref + "]";
        }
    }
    return newText;
}
