LINQ - Projection eases parsing HTML content into data collection

In regards to my http://www.CodePlex.com/MAMLConverter project I have completed the Workflow infrastructure (integrated Unity and logging).  The next requirement is to pass the Workflow a collection of HTML data so that it can translate HTML tags to MAML tags.   The entity I'm going to start with follows:

namespace Workflow.Library.Entities

{

    public class ElementEnt

    {

        public bool IsCloseTag { get; set; }

        public string Tag { get; set; }

        public int TagLen { get; set; }

        public int Len { get; set; }

        public string Content { get; set; }

        public string OriginalContent { get; set; }

    }

}

This Use Case requirement will have me convert the following HTML into a List<ElementEnt> collection:

With the help of my StringExtensions class (many of the extensions written specifically for this task) I'm able to get my results with the query that follows:

Now all that remains is to populate my List<ElementEnt> collection.   LINQ Projection makes this task easy with the few following code changes:

All that remains is to move this code into its own class and create the Unit Test.  I'll then be ready to create the Workflow activities that will handle each of the tags.

String Extensions source follows:

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using System.IO;

using System.Web;

 

namespace Workflow.Library.Extensions

{

    public static class StringExtensions

    {

        /// <summary>

        /// HTMLs the decode.

        /// </summary>

        /// <param name="data">The data.</param>

        /// <returns></returns>

        public static string HtmlDecode(this string data)

        {

            return HttpUtility.HtmlDecode(data);

        }

        /// <summary>

        /// HTMLs the encode.

        /// </summary>

        /// <param name="data">The data.</param>

        /// <returns></returns>

        public static string HtmlEncode(this string data)

        {

            return HttpUtility.HtmlEncode(data);

        }

 

 

        /// <summary>

        /// Gets the length of the tag.

        /// </summary>

        /// <param name="da">The da.</param>

        /// <returns></returns>

        public static int GetTagLength(this string da)

        {

            int offsetSP = da.IndexOf(' ');

            int offset = da.IndexOf('>');

            if (offsetSP > 0 && offsetSP < offset)

                offset = offsetSP;

            if (offset < 1)

                offset = da.Length;

            //return offset;

            return offset;

        }

 

        /// <summary>

        /// Gets the tag.

        /// </summary>

        /// <param name="da">The da.</param>

        /// <returns></returns>

        public static string GetTag(this string da)

        {

            int offset = GetTagLength(da);

            string retValue = da.Substring(0, offset);

            if (retValue.StartsWith("/"))

                retValue = retValue.Substring(1);

            if (retValue.EndsWith(">"))

                retValue = retValue.Substring(0, retValue.Length - 1);

            return retValue;

        }

 

        /// <summary>

        /// Gets the content.

        /// </summary>

        /// <param name="da">The da.</param>

        /// <returns></returns>

        public static string GetContent(this string da)

        {

            int offset = da.GetTagLength() + 1;

            if (offset >= da.Length)

                return "";

            else

                return da.Substring(offset);

        }

 

 

        /// <summary>

        /// Send string to specified filename

        /// </summary>

        /// <param name="data"></param>

        /// <param name="fileName"></param>

        /// <returns></returns>

        public static bool StrToFile(this string data, string fileName)

        {

            //Check if the sepcified file exists

            if (System.IO.File.Exists(fileName) == true)

            {

                // If so then Erase the file first as in this case

                // we are overwriting

                System.IO.File.Delete(fileName);

            }

 

            //Create the file if it does not exist and open it

            FileStream oFs = new

                FileStream(fileName, FileMode.CreateNew, FileAccess.ReadWrite);

 

            //Create a writer for the file

            StreamWriter oWriter = new StreamWriter(oFs);

 

            //Write the contents

            oWriter.Write(data);

            oWriter.Flush();

            oWriter.Close();

 

            oFs.Close();

 

            return true;

        }

 

        /// <summary>

        /// Return file contents as string

        /// </summary>

        /// <param name="cFileName"></param>

        /// <returns></returns>

        public static string FileToStr(this string cFileName)

        {

            //Create a StreamReader and open the file

            StreamReader oReader = System.IO.File.OpenText(cFileName);

 

            //Read all the contents of the file in a string

            string lcString = oReader.ReadToEnd();

 

            //Close the StreamReader and return the string

            oReader.Close();

            return lcString;

        }

    }

}

 


Tags: , , ,
Categories:


Actions: E-mail | Permalink |  Grammar/Typo/Better way? Please let me know