Developer Forum »
Clean up word markup
58 posts

This is a function in the rich text editor in Webnodes backend, but is it possible to use this function manually in code (preferrably C#, not JS)? I need to clean some markup entered in a third party editor, and that function is lacking in this editor.

120 posts

Well, not identical. The one used in the editor is from TinyMCE, but there are Webnodes components that can parse and filter HTML aswell. Some examples:

        var html = "some html...";

        var htmlCleaned1 = new HtmlNode(html, HtmlFilter.WhiteFilter);
        var htmlCleaned2 = new HtmlNode(html, HtmlFilter.YellowFilter);
        var htmlCleaned3 = new HtmlNode(html, HtmlFilter.RedFilter);
        var htmlCleaned4 = new HtmlNode(html, HtmlFilter.GreenFilter);
        var htmlCleaned5 = new HtmlNode(html, HtmlFilter.BlackFilter);
        var htmlCleaned6 = new HtmlNode(html, HtmlFilter.TextFilter);
        var htmlCleaned7 = new HtmlNode(html, HtmlFilter.TextFilterWithBreaks);
        var htmlCleaned8 = new HtmlNode(html, HtmlFilter.TextFilterWithLinefeeds);

You can also create your own filters or traverse HTML tree using the HtmlNode object.   Here is some source code for the various filters:   Good luck! :)

public static string EscapeNonEnglishHtml(string html) {
            HtmlNode n = new HtmlNode(html);
            EscapeNonEnglishHtml(n);
            return n.ToString();
        }
        public static void EscapeNonEnglishHtml(HtmlNode node) {
            foreach (HtmlNode c in node.Children) {
                if (c.TagType == TagType.TextLiteral) {
                    c.Text = EscapeNonEnglishText(HttpUtility.HtmlDecode(c.Text));
                } else {
                    EscapeNonEnglishHtml(c);
                }
            }
        }
        public static string EscapeNonEnglishText(string text) {
            char[] chars = text.ToCharArray();
            StringBuilder sb = new StringBuilder();
            foreach (char c in chars) {
                int ascw = (int)c;
                if (ascw > 127) {
                    sb.Append("&#");
                    sb.Append(ascw.ToString());
                    sb.Append(";");
                } else {
                    sb.Append(c);
                }
            }
            return sb.ToString();
        }
        static HtmlFilter _textFilter;
        public static HtmlFilter TextFilter {
            get {
                if (_textFilter == null) {
                    var f= new HtmlFilter();
                    f.FilterTags = true;
                    f.FilterAttributes = true;
                    f.AddRule_ReplaceTagWithText("h1", " ");
                    f.AddRule_ReplaceTagWithText("h2", " ");
                    f.AddRule_ReplaceTagWithText("h3", " ");
                    f.AddRule_ReplaceTagWithText("h4", " ");
                    f.AddRule_ReplaceTagWithText("h5", " ");
                    f.AddRule_ReplaceTagWithText("h6", " ");
                    f.AddRule_ReplaceTagWithText("br", " ");
                    f.AddRule_ReplaceTagWithText("div", " ");
                    f.AddRule_ReplaceTagWithText("p", " ");
                    _textFilter = f;
                }
                return _textFilter;
            }
        }
        static HtmlFilter _textFilterWithBreaks;
        public static HtmlFilter TextFilterWithBreaks {
            get {
                if (_textFilterWithBreaks == null) {
                    var f = new HtmlFilter();
                    f.FilterTags = true;
                    f.FilterAttributes = true;
                    f.AddRule_AllowTag("br", false);
                    f.AddRule_ReplaceTagWithText("h1", "<br /><br />");
                    f.AddRule_ReplaceTagWithText("h2", "<br /><br />");
                    f.AddRule_ReplaceTagWithText("h3", "<br /><br />");
                    f.AddRule_ReplaceTagWithText("h4", "<br /><br />");
                    f.AddRule_ReplaceTagWithText("h5", "<br /><br />");
                    f.AddRule_ReplaceTagWithText("h6", "<br /><br />");
                    f.AddRule_ReplaceTagWithText("div", "<br /><br />");
                    f.AddRule_ReplaceTagWithText("p", "<br /><br />");
                    f.AddRule_ReplaceTagWithText("hr", "<br /><br />");
                    _textFilterWithBreaks = f;
                }
                return _textFilterWithBreaks;
            }
        }
        static HtmlFilter _textFilterWithLinefeeds;
        public static HtmlFilter TextFilterWithLinefeeds {
            get {
                if (_textFilterWithLinefeeds == null) {
                    var f = new HtmlFilter();
                    f.FilterTags = true;
                    f.FilterAttributes = true;
                    f.AddRule_ReplaceTagWithText("br", "\r\n");
                    f.AddRule_ReplaceTagWithText("h1", "\r\n\r\n");
                    f.AddRule_ReplaceTagWithText("h2", "\r\n\r\n");
                    f.AddRule_ReplaceTagWithText("h3", "\r\n\r\n");
                    f.AddRule_ReplaceTagWithText("h4", "\r\n\r\n");
                    f.AddRule_ReplaceTagWithText("h5", "\r\n\r\n");
                    f.AddRule_ReplaceTagWithText("h6", "\r\n\r\n");
                    f.AddRule_ReplaceTagWithText("div", "\r\n\r\n");
                    f.AddRule_ReplaceTagWithText("p", "\r\n\r\n");
                    f.AddRule_ReplaceTagWithText("hr", "\r\n\r\n");
                    _textFilterWithLinefeeds = f;
                }
                return _textFilterWithLinefeeds;
            }
        }

        static HtmlFilter _blackFilter;
        public static HtmlFilter BlackFilter {
            get {
                if (_blackFilter == null) {
                    HtmlFilter f = TextFilterWithLinefeeds.Clone();
                    _blackFilter = f;
                }
                return _blackFilter;
            }
        }
        static HtmlFilter _redFilter;
        public static HtmlFilter RedFilter {
            get {
                if (_redFilter == null) {
                    HtmlFilter f = TextFilterWithBreaks.Clone();
                    _redFilter = f;
                }
                return _redFilter;
            }
        }
        static HtmlFilter _yellowFilter;
        public static HtmlFilter YellowFilter {
            get {
                if (_yellowFilter == null) {
                    var f = new HtmlFilter();
                    f.FilterTags = true;
                    f.FilterAttributes = true;

                    f.AddRule_AllowTag("h1", true);
                    f.AddRule_AllowTag("h2", true);
                    f.AddRule_AllowTag("h3", true);
                    f.AddRule_AllowTag("h4", true);
                    f.AddRule_AllowTag("h5", true);
                    f.AddRule_AllowTag("h6", true);
                    f.AddRule_AllowTag("br", false);
                    f.AddRule_AllowTag("hr", false);
                    f.AddRule_AllowTag("img", false, null, new string[] { "src" });
                    f.AddRule_AllowTag("html", true);
                    f.AddRule_AllowTag("body", true, new string[] { "html" });

                    f.AddRule_AllowTag("p", true);
                    f.AddRule_AllowTag("div", true);
                    f.AddRule_AllowTag("strong", true); f.AddRule_ReplaceTagWithTag("b", "strong");
                    f.AddRule_AllowTag("i", true);
                    f.AddRule_AllowTag("em", true);
                    f.AddRule_AllowTag("u", true);
                    f.AddRule_AllowTag("del", true); f.AddRule_ReplaceTagWithTag("s", "del"); f.AddRule_ReplaceTagWithTag("strike", "del");

                    f.AddRule_AllowTag("a", true, null, new string[] { "href", "target" });

                    f.AddRule_AllowTag("dir", true);
                    f.AddRule_AllowTag("menu", true);
                    f.AddRule_AllowTag("dl", true);
                    f.AddRule_AllowTag("dt", true);
                    f.AddRule_AllowTag("dd", true);
                    f.AddRule_AllowTag("li", true);
                    f.AddRule_AllowTag("ul", true);
                    f.AddRule_AllowTag("ol", true);

                    _yellowFilter = f;
                }
                return _yellowFilter;
            }
        }
        static HtmlFilter _greenFilter;
        public static HtmlFilter GreenFilter {
            get {
                if (_greenFilter == null) {
                    HtmlFilter f = YellowFilter.Clone();

                    f.AddRule_AllowTag("table", true);
                    f.AddRule_AllowTag("tbody", true, new string[] { "table" });
                    f.AddRule_AllowTag("tr", true, new string[] { "table", "tbody" });
                    f.AddRule_AllowTag("td", true, new string[] { "tr" });
                    f.AddRule_AllowTag("img", false);
                    f.AddRule_AllowTag("span", true);
                    f.AddRule_AllowTag("small", true);
                    f.AddRule_AllowTag("sub", true);
                    f.AddRule_AllowTag("sup", true);
                    f.AddRule_AllowTag("ins", true);

                    f.FilterTags = true;
                    _greenFilter = f;
                }
                return _greenFilter;
            }
        }
        static HtmlFilter _whiteFilter;
        public static HtmlFilter WhiteFilter {
            get {
                if (_whiteFilter == null) {
                    var f = new HtmlFilter();
                    f.FilterTags = true;
                    f.FilterAttributes = true;
                    f.AddRule_AllowTag("meta", false);
                    f.AddRule_AllowTag("br", false);
                    f.AddRule_AllowTag("hr", false);
                    f.AddRule_AllowTag("input", false);
                    f.AddRule_AllowTag("img", false);
                    f.FilterTags = false;
                    f.FilterAttributes = false;
                    _whiteFilter = f;
                }
                return _whiteFilter;
            }
        }
58 posts

Ok, is it the Html agility pack? I'll have a look at it! thanks! :)

 

120 posts

No, internal class (faster and lighter!)

1