本文整理汇总了C#中Sgml.SgmlReader类的典型用法代码示例。如果您正苦于以下问题:C# SgmlReader类的具体用法?C# SgmlReader怎么用?C# SgmlReader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
SgmlReader类属于Sgml命名空间,在下文中一共展示了SgmlReader类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C#代码示例。
示例1: Main
static void Main(string[] args)
{
var array = new JArray();
var crawled = new HashSet<string>();
var sgmlReader = new SgmlReader
{
Href = "http://groups.google.com/group/ravendb/web/docs-http-api-index"
};
crawled.Add(sgmlReader.Href);
var doc = new XmlDocument();
doc.Load(sgmlReader);
var layout = doc.SelectSingleNode("//div[@class='layout']");
var index = new JObject(new JProperty("Html", FixLinks(layout.InnerXml)), new JProperty("Name", "Index"));
array.Add(new JObject(
new JProperty("DocId", "raven_documentation/index"),
new JProperty("Document", index),
new JProperty("Type", "raven documentation"),
new JProperty("Metadata",
new JObject(new JProperty("Raven-View-Template", "/raven/JSONTemplates/documentation.html")))
));
AddDocumentsFromLinks(array, crawled, layout.SelectNodes(".//a"));
File.WriteAllText(args[0], array.ToString(Formatting.Indented));
}
开发者ID:torkelo,项目名称:ravendb,代码行数:28,代码来源:Program.cs
示例2: Create
public static XmlReader Create(string baseUri, string html)
{
var assembly = typeof(SgmlReader).Assembly;
var name = "Html.dtd";
var dtd = default(SgmlDtd);
using (var resource = assembly.GetManifestResourceStream(name))
{
var input = new StreamReader(resource);
dtd = SgmlDtd.Parse(new Uri(baseUri), "HTML", input, null, null, null);
}
var reader = new SgmlReader
{
WhitespaceHandling = WhitespaceHandling.All,
CaseFolding = CaseFolding.ToLower,
Dtd = dtd,
IgnoreDtd = true,
InputStream = new StringReader(html),
};
reader.SetBaseUri(baseUri);
return reader;
}
开发者ID:JesusPanDeVida,项目名称:VocabularioTeologiaBiblica,代码行数:25,代码来源:SgmlFactory.cs
示例3: ParseHtml
// Creates XmlDocument from html content and return it with rootitem "<root>".
public static XmlDocument ParseHtml(string sContent)
{
StringReader sr = new StringReader("<root>" + sContent + "</root>");
SgmlReader reader = new SgmlReader();
reader.WhitespaceHandling = WhitespaceHandling.All;
reader.CaseFolding = Sgml.CaseFolding.ToLower;
reader.InputStream = sr;
StringWriter sw = new StringWriter();
XmlTextWriter w = new XmlTextWriter(sw);
w.Formatting = Formatting.Indented;
w.WriteStartDocument();
reader.Read();
while (!reader.EOF)
{
w.WriteNode(reader, true);
}
w.Flush();
w.Close();
sw.Flush();
// create document
XmlDocument doc = new XmlDocument();
doc.PreserveWhitespace = true;
doc.XmlResolver = null;
doc.LoadXml(sw.ToString());
reader.Close();
return doc;
}
开发者ID:Cabana,项目名称:CMSConverter,代码行数:33,代码来源:SgmlUtil.cs
示例4: Parser
public Parser()
{
_sgmlReader = new SgmlReader();
_sgmlReader.DocType = "HTML";
_sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
_sgmlReader.CaseFolding = CaseFolding.ToLower;
}
开发者ID:benogle,项目名称:html2markup,代码行数:7,代码来源:Parser.cs
示例5: Main
static void Main(string[] args)
{
if (args.Length < 2) {
Console.WriteLine("Usage: BenchSgmlReader.exe filename iterations");
return;
}
var streamReader = new StreamReader(args[0]);
string text = streamReader.ReadToEnd();
streamReader.Close();
int n = int.Parse(args[1]);
var start = DateTime.Now;
for (int i = 0; i < n; i++) {
SgmlReader sgmlReader = new SgmlReader();
sgmlReader.DocType = "HTML";
sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
//sgmlReader.CaseFolding = Sgml.CaseFolding.ToLower;
sgmlReader.InputStream = new StringReader(text);
XmlDocument doc = new XmlDocument();
doc.PreserveWhitespace = true;
doc.XmlResolver = null;
doc.Load(sgmlReader);
}
var stop = DateTime.Now;
var duration = stop - start;
Console.WriteLine("{0} s", (duration.TotalMilliseconds / 1000.0).ToString(CultureInfo.InvariantCulture));
}
开发者ID:FrameworkBy,项目名称:html-parsers-benchmark,代码行数:31,代码来源:BenchSgmlReader.cs
示例6: GetPv
public static int GetPv(int cid, DateTime date)
{
var hatenaId = ConfigurationManager.AppSettings["hatenaId"];
var hatenaPassword = ConfigurationManager.AppSettings["hatenaPassword"];
var wc = new CustomWebClient() { Encoding = Encoding.UTF8 };
wc.Headers.Add("Content-Type", "application/x-www-form-urlencoded");
var data = string.Format(LoginParamBase, hatenaId, hatenaPassword);
wc.UploadString("https://www.hatena.ne.jp/login", "POST", data);
var url = string.Format(CounterUrlBase, hatenaId, cid, date.ToString("yyyy-MM-dd"));
var res = wc.DownloadString(url);
XDocument xml;
using (var sgml = new SgmlReader() { IgnoreDtd = true })
{
sgml.InputStream = new StringReader(res);
xml = XDocument.Load(sgml);
}
var ns = xml.Root.Name.Namespace;
var count = xml.Descendants(ns + "table")
.Where(x => x.FirstAttribute.Value == "totalcount")
.Descendants(ns + "strong")
.First().Value;
return int.Parse(count);
}
开发者ID:nakaji,项目名称:nakaji-api,代码行数:27,代码来源:HatenaCounterHelper.cs
示例7: GetWellFormedHTML
public static string GetWellFormedHTML(string html, string xpathNavPath)
{
// StreamReader sReader = null;
StringWriter sw = null;
SgmlReader reader = null;
XmlTextWriter writer = null;
try
{
// if (uri == String.Empty) uri = "http://www.XMLforASP.NET";
// HttpWebRequest req = (HttpWebRequest)WebRequest.Create(uri);
// HttpWebResponse res = (HttpWebResponse)req.GetResponse();
// sReader = new StreamReader(res.GetResponseStream());
reader = new SgmlReader();
reader.DocType = "HTML";
reader.InputStream = new StringReader(html);
sw = new StringWriter();
writer = new XmlTextWriter(sw);
writer.Formatting = Formatting.Indented;
//writer.WriteStartElement("Test");
while (reader.Read())
{
if (reader.NodeType != XmlNodeType.Whitespace)
{
writer.WriteNode(reader, true);
}
}
//writer.WriteEndElement();
if (xpathNavPath == null)
{
string sr = sw.ToString();
sr = sr.Replace("\r", "\n");
sr = sr.Replace("\n\n", "\n");
return sr;
}
else
{ //Filter out nodes from HTML
StringBuilder sb = new StringBuilder();
XPathDocument doc = new XPathDocument(new StringReader(sw.ToString()));
XPathNavigator nav = doc.CreateNavigator();
XPathNodeIterator nodes = nav.Select(xpathNavPath);
while (nodes.MoveNext())
{
sb.Append(nodes.Current.Value + "\n");
}
string sr = sb.ToString();
sr = sr.Replace("\r", "\n");
sr = sr.Replace("\n\n", "\n");
return sr;
}
}
catch (Exception exp)
{
writer.Close();
reader.Close();
sw.Close();
// sReader.Close();
return exp.Message;
}
}
开发者ID:drzo,项目名称:opensim4opencog,代码行数:59,代码来源:HttpUtil.cs
示例8: HtmlReader
/// <summary>
/// コンストラクタ
/// </summary>
/// <param name="url">参照先URL</param>
/// <param name="follow">robots.txt参照可否</param>
/// <param name="agent">ユーザーエージェント</param>
public HtmlReader(string url, bool follow = true, UserAgent agent = null, Encoding encoding = null)
{
// Httpリクエスト
HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
// ユーザーエージェント
if (agent != null)
req.UserAgent = agent.ToString();
// robots.txt
Robots robots = (follow) ? Robots.Create(new Uri(url)) : null;
if (robots != null) {
if (!robots.Parse(url))
throw new RobotsDisallowException("Robots Disallow [" + url + "]");
if (robots.CrawlDelay != 0)
System.Threading.Thread.Sleep(robots.CrawlDelay * 1000);
}
using (HttpWebResponse res = (HttpWebResponse)req.GetResponse())
using (Stream stream = res.GetResponseStream()) {
Encoding enc = (encoding != null) ? encoding : Encoding.GetEncoding(res.CharacterSet);
using (StreamReader reader = new StreamReader(stream, enc))
using (SgmlReader sgml = new SgmlReader {
DocType = "HTML",
InputStream = reader,
CaseFolding = CaseFolding.ToLower,
IgnoreDtd = true
}) {
Html = XDocument.Load(sgml, LoadOptions.None);
Uri = url;
Encoding = enc;
}
}
}
开发者ID:t-kojima,项目名称:WebScrapingLibrary,代码行数:38,代码来源:HtmlReader.cs
示例9: GetXmlFromHtmlString
public static String GetXmlFromHtmlString (String html)
{
using (SgmlReader sr = new SgmlReader())
{
sr.InputStream = new StringReader(html);
return sr.ReadOuterXml();
}
}
开发者ID:xxjeng,项目名称:nuxleus,代码行数:8,代码来源:HttpSgmlToXml.cs
示例10: ParseHtml
static XDocument ParseHtml(TextReader reader)
{
using (var sgmlReader = new SgmlReader { DocType = "HTML", CaseFolding = CaseFolding.ToLower })
{
sgmlReader.InputStream = reader;
return XDocument.Load(sgmlReader);
}
}
开发者ID:Rapids,项目名称:Verde,代码行数:8,代码来源:GlobalForm.cs
示例11: ParseHtml
private static XDocument ParseHtml( TextReader _Reader )
{
using ( var sgmlReader = new SgmlReader {
DocType = "HTML",
CaseFolding = CaseFolding.ToLower,
InputStream = _Reader, } )
{
return XDocument.Load( sgmlReader );
}
}
开发者ID:TatsuyaHoshina,项目名称:Tatsuya.IIDX,代码行数:10,代码来源:IIDXWeb.cs
示例12: SetUp
public void SetUp()
{
_sgmlReader =
new SgmlReader
{
CaseFolding = CaseFolding.ToLower,
DocType = "HTML",
WhitespaceHandling = WhitespaceHandling.None
};
}
开发者ID:panuganti,项目名称:nreadability,代码行数:10,代码来源:SgmlReaderTests.cs
示例13: FetchXmlDocument
XmlDocument FetchXmlDocument(Uri url)
{
var sr = FetchWebText (url);
var xr = new SgmlReader () { InputStream = sr };
var doc = new XmlDocument ();
doc.Load (xr);
sr.Close ();
xr.Close ();
return doc;
}
开发者ID:atsushieno,项目名称:monodroid-schema-gen,代码行数:10,代码来源:type-hierarchy-importer.cs
示例14: FetchHtmlFromUrlAsXDocument
public static XDocument FetchHtmlFromUrlAsXDocument(string url)
{
var webRequest = WebRequest.Create(url);
using (var reader = new StreamReader(webRequest.GetResponse().GetResponseStream()))
{
var sgml = new SgmlReader();
sgml.DocType = "HTML";
sgml.CaseFolding = CaseFolding.ToLower;
sgml.InputStream = reader;
return new XDocument(XDocument.Load(sgml));
}
}
开发者ID:blanciq,项目名称:serialz,代码行数:12,代码来源:RequestHelper.cs
示例15: FindImgs
internal static ImageInfo[] FindImgs(
string htmlCode)
{
var r =
new SgmlReader
{
DocType = @"HTML",
InputStream = new StringReader(htmlCode)
};
var al = new List<ImageInfo>();
//find <img src=""
while (r.Read())
{
if (r.NodeType == XmlNodeType.Element)
{
if (string.Compare(r.Name, @"img", StringComparison.OrdinalIgnoreCase) == 0)
{
if (r.HasAttributes)
{
var ii = new ImageInfo();
while (r.MoveToNextAttribute())
{
switch (r.Name.ToLowerInvariant())
{
case @"src":
ii.Source = r.Value;
break;
case @"width":
ii.Width = ConvertHelper.ToInt32(r.Value);
break;
case @"height":
ii.Height = ConvertHelper.ToInt32(r.Value);
break;
}
}
// --
if (!string.IsNullOrEmpty(ii.Source))
{
al.Add(ii);
}
}
}
}
}
return al.ToArray();
}
开发者ID:jorik041,项目名称:ZetaHtmlEditControl,代码行数:51,代码来源:HtmlConversionHelper.cs
示例16: CanParseResponseAsXML
public void CanParseResponseAsXML()
{
using(var inputReader = new StreamReader(new FileStream("fixture.txt", FileMode.Open)))
{
var reader = new SgmlReader();
reader.InputStream = inputReader;
reader.CaseFolding = CaseFolding.ToLower;
reader.DocType = "HTML";
var document = new XDocument(XDocument.Load((XmlReader) reader));
Assert.IsNotNull(document.ToString());
}
}
开发者ID:blanciq,项目名称:serialz,代码行数:14,代码来源:WikiParserTests.cs
示例17: LoadHtmlPageAsXMLInternal
private XmlDocument LoadHtmlPageAsXMLInternal(string postData, string uri, string httpMethod)
{
// Prepare web request...
HttpWebRequest webrequest = (HttpWebRequest)WebRequest.Create(uri);
// Deal with proxy details if any.
WebProxy proxy = null;
if (_proxySettings.Option == ProxySettingsDTO.ProxyOption.UseIESettings)
{
throw new NotSupportedException("IE proxy settings are not supported by this module!");
}
if (_proxySettings.Option == ProxySettingsDTO.ProxyOption.Custom)
{
proxy = new WebProxy(_proxySettings.ProxyHost, _proxySettings.ProxyPort);
webrequest.Proxy = proxy;
}
webrequest.Method = httpMethod;
if(String.Equals(httpMethod, "POST", StringComparison.OrdinalIgnoreCase))
{
ASCIIEncoding encoding = new ASCIIEncoding();
byte[] data = encoding.GetBytes(postData);
webrequest.ContentType = "application/x-www-form-urlencoded";
webrequest.ContentLength = data.Length;
using (Stream newStream = webrequest.GetRequestStream())
{
newStream.Write(data, 0, data.Length);
}
}
HttpWebResponse webresponse = (HttpWebResponse)webrequest.GetResponse();
Encoding enc = System.Text.Encoding.GetEncoding(1252);
StreamReader loResponseStream = new StreamReader(webresponse.GetResponseStream(), enc);
string Buffer = loResponseStream.ReadToEnd();
loResponseStream.Close();
webresponse.Close();
StringReader stringReader = new StringReader(Buffer);
// Use the cool sgml reader to 'interpret' the HTML as XML :) very nice!
SgmlReader sgmlReader = new SgmlReader();
sgmlReader.DocType = "HTML";
sgmlReader.InputStream = stringReader;
XmlDocument doc = new XmlDocument();
doc.Load(sgmlReader);
return doc;
}
开发者ID:petegee,项目名称:AHPilotStats,代码行数:50,代码来源:HttpToXMLLoader.cs
示例18: getSessionId
//セッションIDを取得(更新)する
void getSessionId(Stream stream)
{
var enc = System.Text.Encoding.UTF8;
using (var reader = new StreamReader(stream, enc))
using (var sgmlReader = new SgmlReader { InputStream = reader })
{
sgmlReader.DocType = "HTML";
sgmlReader.CaseFolding = CaseFolding.ToLower;
var doc = XDocument.Load(sgmlReader);
var ns = doc.Root.Name.Namespace;
var q = doc.Descendants(ns + "input")
.Where(ul => ul.Attribute("id") != null && ul.Attribute("id").Value == "com.sun.faces.VIEW")
.Select(el => el.Attribute("value").Value).FirstOrDefault();
session_id = q;
}
}
开发者ID:nullpoo,项目名称:UNIPA_HACK,代码行数:17,代码来源:MainWindow.xaml.cs
示例19: BuildDocument
/// <summary>
/// Constructs a DOM (System.Xml.Linq.XDocument) from HTML markup.
/// </summary>
/// <param name="htmlContent">HTML markup from which the DOM is to be constructed.</param>
/// <returns>System.Linq.Xml.XDocument instance which is a DOM of the provided HTML markup.</returns>
public XDocument BuildDocument(string htmlContent)
{
if (htmlContent == null)
{
throw new ArgumentNullException("htmlContent");
}
if (htmlContent.Trim().Length == 0)
{
return new XDocument();
}
// "trim end" htmlContent to ...</html>$ (codinghorror.com puts some scripts after the </html> - sic!)
const string htmlEnd = "</html";
int indexOfHtmlEnd = htmlContent.LastIndexOf(htmlEnd);
if (indexOfHtmlEnd != -1)
{
int indexOfHtmlEndBracket = htmlContent.IndexOf('>', indexOfHtmlEnd);
if (indexOfHtmlEndBracket != -1)
{
htmlContent = htmlContent.Substring(0, indexOfHtmlEndBracket + 1);
}
}
// load the document using sgml reader
using (var sgmlReader = new SgmlReader())
{
sgmlReader.CaseFolding = CaseFolding.ToLower;
sgmlReader.DocType = "HTML";
sgmlReader.WhitespaceHandling = WhitespaceHandling.None;
using (var sr = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(htmlContent))))
{
sgmlReader.InputStream = sr;
var document = XDocument.Load(sgmlReader);
return document;
}
}
}
开发者ID:panuganti,项目名称:nreadability,代码行数:48,代码来源:SgmlDomBuilder.cs
示例20: ReadHtmlAsXhtml
public static XElement ReadHtmlAsXhtml(string html)
{
//detect if xhtml by looking for namespace near start
if (html.IndexOf("http://www.w3.org/1999/xhtml") < 200)
{
//must be xhtml, so just parse as xml
return XElement.Parse(html);
}
else
{
//probably html, so parse as sgml
SgmlReader sgml = new SgmlReader();
sgml.DocType = "HTML";
sgml.WhitespaceHandling = WhitespaceHandling.All;
sgml.CaseFolding = Sgml.CaseFolding.ToLower;
sgml.InputStream = new StringReader(html);
return XElement.Load(sgml);
}
}
开发者ID:erikzaadi,项目名称:atomsitethemes.erikzaadi.com,代码行数:19,代码来源:WebHelper.cs
注:本文中的Sgml.SgmlReader类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论