Resource / URL Qualifier
Last edited on 9 December 2008

From blog post "Normalizer of Web Pages, Qualifier of URLs".

/*
	Resource Qualifier - By Forrest Croce, December 2008.
	This code is released as open source;  please attribute the author and source.
*/

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HtmlAgilityPack;
using System.Net;

namespace FullyQualifyNetworkResourceAddresses {
	public sealed class ResourceQualifier {
		string sourceUrl, originalHtml, cleanHtml;
		Uri sourceUri;

		public ResourceQualifier() { }

		public ResourceQualifier(string url) {
			SourceUrl = url;
		}


		public string SourceUrl {
			get { return sourceUrl; }
			set {
				sourceUri = new Uri(value);
				sourceUrl = value;
				cleanHtml = null;
			}
		}


		public string OriginalHtml {
			get { return originalHtml; }
			set {
				originalHtml = value;
				cleanHtml = null;
			}
		}

		public string CleanHtml {
			get { return cleanHtml; }
			internal set { cleanHtml = value; }
		}
		

		public string Code() {
			if (!string.IsNullOrEmpty(cleanHtml) && cleanHtml != null && cleanHtml.Trim() != string.Empty)
				return cleanHtml;

			if (string.IsNullOrEmpty(originalHtml))
				try {
					originalHtml = WebUtility.GetPageCode(sourceUri);
				} catch (Exception ex) {
					return null;
				}

			HtmlDocument doc = WebUtility.GetPage(originalHtml);
			RecursiveQualifier(doc.DocumentNode);

			return cleanHtml = doc.DocumentNode.OuterHtml;
		}

		private void RecursiveQualifier(HtmlNode node) {
			QualifyNode(node);

			foreach (HtmlNode child in node.ChildNodes)
				RecursiveQualifier(child);
		}

		private void QualifyNode(HtmlNode node) {
			if (node.HasAttributes)
				foreach (HtmlAttribute a in node.Attributes)
					if (string.Compare(a.Name, "src", StringComparison.OrdinalIgnoreCase) == 0 || string.Compare(a.Name, "href", StringComparison.OrdinalIgnoreCase) == 0)
						if (Uri.IsWellFormedUriString(a.Value, UriKind.RelativeOrAbsolute) && !(new Uri(a.Value, UriKind.RelativeOrAbsolute).IsAbsoluteUri))
							a.Value = QualifyUrl(a.Value).ToString();
		}

		private Uri QualifyUrl(string url) {
			return WebUtility.Qualify(sourceUrl, url);
		}

	}
}