Last edited on 9 December 2008
From blog post "Normalizer of Web Pages, Qualifier of URLs".
/*
Resource Qualifier - By Forrest Croce, December 2008.
This code is released as open source; please attribute the author and source.
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HtmlAgilityPack;
using System.Net;
namespace FullyQualifyNetworkResourceAddresses {
public sealed class ResourceQualifier {
string sourceUrl, originalHtml, cleanHtml;
Uri sourceUri;
public ResourceQualifier() { }
public ResourceQualifier(string url) {
SourceUrl = url;
}
public string SourceUrl {
get { return sourceUrl; }
set {
sourceUri = new Uri(value);
sourceUrl = value;
cleanHtml = null;
}
}
public string OriginalHtml {
get { return originalHtml; }
set {
originalHtml = value;
cleanHtml = null;
}
}
public string CleanHtml {
get { return cleanHtml; }
internal set { cleanHtml = value; }
}
public string Code() {
if (!string.IsNullOrEmpty(cleanHtml) && cleanHtml != null && cleanHtml.Trim() != string.Empty)
return cleanHtml;
if (string.IsNullOrEmpty(originalHtml))
try {
originalHtml = WebUtility.GetPageCode(sourceUri);
} catch (Exception ex) {
return null;
}
HtmlDocument doc = WebUtility.GetPage(originalHtml);
RecursiveQualifier(doc.DocumentNode);
return cleanHtml = doc.DocumentNode.OuterHtml;
}
private void RecursiveQualifier(HtmlNode node) {
QualifyNode(node);
foreach (HtmlNode child in node.ChildNodes)
RecursiveQualifier(child);
}
private void QualifyNode(HtmlNode node) {
if (node.HasAttributes)
foreach (HtmlAttribute a in node.Attributes)
if (string.Compare(a.Name, "src", StringComparison.OrdinalIgnoreCase) == 0 || string.Compare(a.Name, "href", StringComparison.OrdinalIgnoreCase) == 0)
if (Uri.IsWellFormedUriString(a.Value, UriKind.RelativeOrAbsolute) && !(new Uri(a.Value, UriKind.RelativeOrAbsolute).IsAbsoluteUri))
a.Value = QualifyUrl(a.Value).ToString();
}
private Uri QualifyUrl(string url) {
return WebUtility.Qualify(sourceUrl, url);
}
}
}