• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

C# Sgml.SgmlReader类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了C#中Sgml.SgmlReader的典型用法代码示例。如果您正苦于以下问题:C# SgmlReader类的具体用法?C# SgmlReader怎么用?C# SgmlReader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



SgmlReader类属于Sgml命名空间,在下文中一共展示了SgmlReader类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C#代码示例。

示例1: Main

        static void Main(string[] args)
        {
            var array = new JArray();
            var crawled = new HashSet<string>();
            var sgmlReader = new SgmlReader
            {
                Href = "http://groups.google.com/group/ravendb/web/docs-http-api-index"
            };
            crawled.Add(sgmlReader.Href);
            var doc = new XmlDocument();
            doc.Load(sgmlReader);

            var layout = doc.SelectSingleNode("//div[@class='layout']");

            var index = new JObject(new JProperty("Html", FixLinks(layout.InnerXml)), new JProperty("Name", "Index"));

            array.Add(new JObject(
                      	new JProperty("DocId", "raven_documentation/index"),
                      	new JProperty("Document", index),
                        new JProperty("Type", "raven documentation"),
                      	new JProperty("Metadata",
                      	              new JObject(new JProperty("Raven-View-Template", "/raven/JSONTemplates/documentation.html")))
                      	));

            AddDocumentsFromLinks(array, crawled, layout.SelectNodes(".//a"));

            File.WriteAllText(args[0], array.ToString(Formatting.Indented));
        }
开发者ID:torkelo,项目名称:ravendb,代码行数:28,代码来源:Program.cs


示例2: Create

        public static XmlReader Create(string baseUri, string html)
        {
            var assembly = typeof(SgmlReader).Assembly;
            var name = "Html.dtd";
            var dtd = default(SgmlDtd);

            using (var resource = assembly.GetManifestResourceStream(name))
            {
                var input = new StreamReader(resource);
                dtd = SgmlDtd.Parse(new Uri(baseUri), "HTML", input, null, null, null);
            }

            var reader = new SgmlReader
            {
                WhitespaceHandling = WhitespaceHandling.All,
                CaseFolding = CaseFolding.ToLower,
                Dtd = dtd,
                IgnoreDtd = true,
                InputStream = new StringReader(html),
            };

            reader.SetBaseUri(baseUri);

            return reader;
        }
开发者ID:JesusPanDeVida,项目名称:VocabularioTeologiaBiblica,代码行数:25,代码来源:SgmlFactory.cs


示例3: ParseHtml

        // Creates XmlDocument from html content and return it with rootitem "<root>".
        public static XmlDocument ParseHtml(string sContent)
        {
            StringReader sr = new StringReader("<root>" + sContent + "</root>");
            SgmlReader reader = new SgmlReader();
            reader.WhitespaceHandling = WhitespaceHandling.All;
            reader.CaseFolding = Sgml.CaseFolding.ToLower;
            reader.InputStream = sr;

            StringWriter sw = new StringWriter();
            XmlTextWriter w = new XmlTextWriter(sw);
            w.Formatting = Formatting.Indented;
            w.WriteStartDocument();
            reader.Read();
            while (!reader.EOF)
            {
                w.WriteNode(reader, true);
            }
            w.Flush();
            w.Close();

            sw.Flush();

            // create document
            XmlDocument doc = new XmlDocument();
            doc.PreserveWhitespace = true;
            doc.XmlResolver = null;
            doc.LoadXml(sw.ToString());

            reader.Close();

            return doc;
        }
开发者ID:Cabana,项目名称:CMSConverter,代码行数:33,代码来源:SgmlUtil.cs


示例4: Parser

 public Parser()
 {
     _sgmlReader = new SgmlReader();
     _sgmlReader.DocType = "HTML";
     _sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
     _sgmlReader.CaseFolding = CaseFolding.ToLower;
 }
开发者ID:benogle,项目名称:html2markup,代码行数:7,代码来源:Parser.cs


示例5: Main

    static void Main(string[] args)
    {
        if (args.Length < 2) {
            Console.WriteLine("Usage: BenchSgmlReader.exe filename iterations");
            return;
        }

        var streamReader = new StreamReader(args[0]);
        string text = streamReader.ReadToEnd();
        streamReader.Close();

        int n = int.Parse(args[1]);

        var start = DateTime.Now;
        for (int i = 0; i < n; i++) {
            SgmlReader sgmlReader = new SgmlReader();
            sgmlReader.DocType = "HTML";
            sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
            //sgmlReader.CaseFolding = Sgml.CaseFolding.ToLower;
            sgmlReader.InputStream = new StringReader(text);

            XmlDocument doc = new XmlDocument();
            doc.PreserveWhitespace = true;
            doc.XmlResolver = null;
            doc.Load(sgmlReader);
        }
        var stop = DateTime.Now;

        var duration = stop - start;
        Console.WriteLine("{0} s", (duration.TotalMilliseconds / 1000.0).ToString(CultureInfo.InvariantCulture));
    }
开发者ID:FrameworkBy,项目名称:html-parsers-benchmark,代码行数:31,代码来源:BenchSgmlReader.cs


示例6: GetPv

        public static int GetPv(int cid, DateTime date)
        {
            var hatenaId = ConfigurationManager.AppSettings["hatenaId"];
            var hatenaPassword = ConfigurationManager.AppSettings["hatenaPassword"];

            var wc = new CustomWebClient() { Encoding = Encoding.UTF8 };
            wc.Headers.Add("Content-Type", "application/x-www-form-urlencoded");

            var data = string.Format(LoginParamBase, hatenaId, hatenaPassword);
            wc.UploadString("https://www.hatena.ne.jp/login", "POST", data);

            var url = string.Format(CounterUrlBase, hatenaId, cid, date.ToString("yyyy-MM-dd"));
            var res = wc.DownloadString(url);

            XDocument xml;
            using (var sgml = new SgmlReader() { IgnoreDtd = true })
            {
                sgml.InputStream = new StringReader(res);
                xml = XDocument.Load(sgml);
            }
            var ns = xml.Root.Name.Namespace;
            var count = xml.Descendants(ns + "table")
                .Where(x => x.FirstAttribute.Value == "totalcount")
                .Descendants(ns + "strong")
                .First().Value;
            return int.Parse(count);
        }
开发者ID:nakaji,项目名称:nakaji-api,代码行数:27,代码来源:HatenaCounterHelper.cs


示例7: GetWellFormedHTML

 public static string GetWellFormedHTML(string html, string xpathNavPath)
 {
     // StreamReader sReader = null;
     StringWriter sw = null;
     SgmlReader reader = null;
     XmlTextWriter writer = null;
     try
     {
         //  if (uri == String.Empty) uri = "http://www.XMLforASP.NET";
         // HttpWebRequest req = (HttpWebRequest)WebRequest.Create(uri);
         //  HttpWebResponse res = (HttpWebResponse)req.GetResponse();
         //  sReader = new StreamReader(res.GetResponseStream());
         reader = new SgmlReader();
         reader.DocType = "HTML";
         reader.InputStream = new StringReader(html);
         sw = new StringWriter();
         writer = new XmlTextWriter(sw);
         writer.Formatting = Formatting.Indented;
         //writer.WriteStartElement("Test");
         while (reader.Read())
         {
             if (reader.NodeType != XmlNodeType.Whitespace)
             {
                 writer.WriteNode(reader, true);
             }
         }
         //writer.WriteEndElement();
         if (xpathNavPath == null)
         {
             string sr = sw.ToString();
             sr = sr.Replace("\r", "\n");
             sr = sr.Replace("\n\n", "\n");
             return sr;
         }
         else
         { //Filter out nodes from HTML
             StringBuilder sb = new StringBuilder();
             XPathDocument doc = new XPathDocument(new StringReader(sw.ToString()));
             XPathNavigator nav = doc.CreateNavigator();
             XPathNodeIterator nodes = nav.Select(xpathNavPath);
             while (nodes.MoveNext())
             {
                 sb.Append(nodes.Current.Value + "\n");
             }
             string sr = sb.ToString();
             sr = sr.Replace("\r", "\n");
             sr = sr.Replace("\n\n", "\n");
             return sr;
         }
     }
     catch (Exception exp)
     {
         writer.Close();
         reader.Close();
         sw.Close();
         // sReader.Close();
         return exp.Message;
     }
 }
开发者ID:drzo,项目名称:opensim4opencog,代码行数:59,代码来源:HttpUtil.cs


示例8: HtmlReader

        /// <summary>
        /// コンストラクタ
        /// </summary>
        /// <param name="url">参照先URL</param>
        /// <param name="follow">robots.txt参照可否</param>
        /// <param name="agent">ユーザーエージェント</param>
        public HtmlReader(string url, bool follow = true, UserAgent agent = null, Encoding encoding = null)
        {
            // Httpリクエスト
            HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
            // ユーザーエージェント
            if (agent != null)
                req.UserAgent = agent.ToString();
            // robots.txt
            Robots robots = (follow) ? Robots.Create(new Uri(url)) : null;
            if (robots != null) {
                if (!robots.Parse(url))
                    throw new RobotsDisallowException("Robots Disallow [" + url + "]");
                if (robots.CrawlDelay != 0)
                    System.Threading.Thread.Sleep(robots.CrawlDelay * 1000);
            }

            using (HttpWebResponse res = (HttpWebResponse)req.GetResponse())
            using (Stream stream = res.GetResponseStream()) {
                Encoding enc = (encoding != null) ? encoding : Encoding.GetEncoding(res.CharacterSet);
                using (StreamReader reader = new StreamReader(stream, enc))
                using (SgmlReader sgml = new SgmlReader {
                    DocType = "HTML",
                    InputStream = reader,
                    CaseFolding = CaseFolding.ToLower,
                    IgnoreDtd = true
                }) {
                    Html = XDocument.Load(sgml, LoadOptions.None);
                    Uri = url;
                    Encoding = enc;
                }
            }
        }
开发者ID:t-kojima,项目名称:WebScrapingLibrary,代码行数:38,代码来源:HtmlReader.cs


示例9: GetXmlFromHtmlString

 public static String GetXmlFromHtmlString (String html)
 {
     using (SgmlReader sr = new SgmlReader())
     {
         sr.InputStream = new StringReader(html);
         return sr.ReadOuterXml();
     }
 }
开发者ID:xxjeng,项目名称:nuxleus,代码行数:8,代码来源:HttpSgmlToXml.cs


示例10: ParseHtml

 static XDocument ParseHtml(TextReader reader)
 {
     using (var sgmlReader = new SgmlReader { DocType = "HTML", CaseFolding = CaseFolding.ToLower })
     {
         sgmlReader.InputStream = reader;
         return XDocument.Load(sgmlReader);
     }
 }
开发者ID:Rapids,项目名称:Verde,代码行数:8,代码来源:GlobalForm.cs


示例11: ParseHtml

		private static XDocument ParseHtml( TextReader _Reader )
		{
			using ( var sgmlReader = new SgmlReader {
				DocType = "HTML",
				CaseFolding = CaseFolding.ToLower,
				InputStream = _Reader, } )
			{
				return XDocument.Load( sgmlReader );
			}
		}
开发者ID:TatsuyaHoshina,项目名称:Tatsuya.IIDX,代码行数:10,代码来源:IIDXWeb.cs


示例12: SetUp

 public void SetUp()
 {
     _sgmlReader =
     new SgmlReader
       {
     CaseFolding = CaseFolding.ToLower,
     DocType = "HTML",
     WhitespaceHandling = WhitespaceHandling.None
       };
 }
开发者ID:panuganti,项目名称:nreadability,代码行数:10,代码来源:SgmlReaderTests.cs


示例13: FetchXmlDocument

 XmlDocument FetchXmlDocument(Uri url)
 {
     var sr = FetchWebText (url);
     var xr = new SgmlReader () { InputStream = sr };
     var doc = new XmlDocument ();
     doc.Load (xr);
     sr.Close ();
     xr.Close ();
     return doc;
 }
开发者ID:atsushieno,项目名称:monodroid-schema-gen,代码行数:10,代码来源:type-hierarchy-importer.cs


示例14: FetchHtmlFromUrlAsXDocument

 public static XDocument FetchHtmlFromUrlAsXDocument(string url)
 {
     var webRequest = WebRequest.Create(url);
     using (var reader = new StreamReader(webRequest.GetResponse().GetResponseStream()))
     {
         var sgml = new SgmlReader();
         sgml.DocType = "HTML";
         sgml.CaseFolding = CaseFolding.ToLower;
         sgml.InputStream = reader;
         return new XDocument(XDocument.Load(sgml));
     }
 }
开发者ID:blanciq,项目名称:serialz,代码行数:12,代码来源:RequestHelper.cs


示例15: FindImgs

        internal static ImageInfo[] FindImgs(
            string htmlCode)
        {
            var r =
                new SgmlReader
                    {
                        DocType = @"HTML",
                        InputStream = new StringReader(htmlCode)
                    };
            var al = new List<ImageInfo>();

            //find <img src=""
            while (r.Read())
            {
                if (r.NodeType == XmlNodeType.Element)
                {
                    if (string.Compare(r.Name, @"img", StringComparison.OrdinalIgnoreCase) == 0)
                    {
                        if (r.HasAttributes)
                        {
                            var ii = new ImageInfo();

                            while (r.MoveToNextAttribute())
                            {
                                switch (r.Name.ToLowerInvariant())
                                {
                                    case @"src":
                                        ii.Source = r.Value;
                                        break;
                                    case @"width":
                                        ii.Width = ConvertHelper.ToInt32(r.Value);
                                        break;
                                    case @"height":
                                        ii.Height = ConvertHelper.ToInt32(r.Value);
                                        break;
                                }
                            }

                            // --

                            if (!string.IsNullOrEmpty(ii.Source))
                            {
                                al.Add(ii);
                            }
                        }
                    }
                }
            }

            return al.ToArray();
        }
开发者ID:jorik041,项目名称:ZetaHtmlEditControl,代码行数:51,代码来源:HtmlConversionHelper.cs


示例16: CanParseResponseAsXML

        public void CanParseResponseAsXML()
        {
            using(var inputReader = new StreamReader(new FileStream("fixture.txt", FileMode.Open)))
            {
                var reader = new SgmlReader();
                reader.InputStream = inputReader;
                reader.CaseFolding = CaseFolding.ToLower;
                reader.DocType = "HTML";

                var document = new XDocument(XDocument.Load((XmlReader) reader));

                Assert.IsNotNull(document.ToString());
            }
        }
开发者ID:blanciq,项目名称:serialz,代码行数:14,代码来源:WikiParserTests.cs


示例17: LoadHtmlPageAsXMLInternal

        private XmlDocument LoadHtmlPageAsXMLInternal(string postData, string uri, string httpMethod)
        {
            // Prepare web request...
            HttpWebRequest webrequest = (HttpWebRequest)WebRequest.Create(uri);

            // Deal with proxy details if any.
            WebProxy proxy = null;
            if (_proxySettings.Option == ProxySettingsDTO.ProxyOption.UseIESettings)
            {
                throw new NotSupportedException("IE proxy settings are not supported by this module!");
            }
            if (_proxySettings.Option == ProxySettingsDTO.ProxyOption.Custom)
            {
                proxy = new WebProxy(_proxySettings.ProxyHost, _proxySettings.ProxyPort);
                webrequest.Proxy = proxy;
            }
            webrequest.Method = httpMethod;

            if(String.Equals(httpMethod, "POST", StringComparison.OrdinalIgnoreCase))
            {
                ASCIIEncoding encoding = new ASCIIEncoding();
                byte[] data = encoding.GetBytes(postData);

                webrequest.ContentType = "application/x-www-form-urlencoded";
                webrequest.ContentLength = data.Length;

                using (Stream newStream = webrequest.GetRequestStream())
                {
                    newStream.Write(data, 0, data.Length);
                }
            }

            HttpWebResponse webresponse = (HttpWebResponse)webrequest.GetResponse();
            Encoding enc = System.Text.Encoding.GetEncoding(1252);
            StreamReader loResponseStream = new StreamReader(webresponse.GetResponseStream(), enc);
            string Buffer = loResponseStream.ReadToEnd();
            loResponseStream.Close();
            webresponse.Close();

            StringReader stringReader = new StringReader(Buffer);

            // Use the cool sgml reader to 'interpret' the HTML as XML :) very nice!
            SgmlReader sgmlReader = new SgmlReader();
            sgmlReader.DocType = "HTML";
            sgmlReader.InputStream = stringReader;
            XmlDocument doc = new XmlDocument();
            doc.Load(sgmlReader);

            return doc;
        }
开发者ID:petegee,项目名称:AHPilotStats,代码行数:50,代码来源:HttpToXMLLoader.cs


示例18: getSessionId

 //セッションIDを取得(更新)する
 void getSessionId(Stream stream)
 {
     var enc = System.Text.Encoding.UTF8;
     using (var reader = new StreamReader(stream, enc))
     using (var sgmlReader = new SgmlReader { InputStream = reader })
     {
         sgmlReader.DocType = "HTML";
         sgmlReader.CaseFolding = CaseFolding.ToLower;
         var doc = XDocument.Load(sgmlReader);
         var ns = doc.Root.Name.Namespace;
         var q = doc.Descendants(ns + "input")
             .Where(ul => ul.Attribute("id") != null && ul.Attribute("id").Value == "com.sun.faces.VIEW")
             .Select(el => el.Attribute("value").Value).FirstOrDefault();
         session_id = q;
     }
 }
开发者ID:nullpoo,项目名称:UNIPA_HACK,代码行数:17,代码来源:MainWindow.xaml.cs


示例19: BuildDocument

        /// <summary>
        /// Constructs a DOM (System.Xml.Linq.XDocument) from HTML markup.
        /// </summary>
        /// <param name="htmlContent">HTML markup from which the DOM is to be constructed.</param>
        /// <returns>System.Linq.Xml.XDocument instance which is a DOM of the provided HTML markup.</returns>
        public XDocument BuildDocument(string htmlContent)
        {
            if (htmlContent == null)
              {
            throw new ArgumentNullException("htmlContent");
              }

              if (htmlContent.Trim().Length == 0)
              {
            return new XDocument();
              }

              // "trim end" htmlContent to ...</html>$ (codinghorror.com puts some scripts after the </html> - sic!)
              const string htmlEnd = "</html";
              int indexOfHtmlEnd = htmlContent.LastIndexOf(htmlEnd);

              if (indexOfHtmlEnd != -1)
              {
            int indexOfHtmlEndBracket = htmlContent.IndexOf('>', indexOfHtmlEnd);

            if (indexOfHtmlEndBracket != -1)
            {
              htmlContent = htmlContent.Substring(0, indexOfHtmlEndBracket + 1);
            }
              }

              // load the document using sgml reader
              using (var sgmlReader = new SgmlReader())
              {
            sgmlReader.CaseFolding = CaseFolding.ToLower;
            sgmlReader.DocType = "HTML";
            sgmlReader.WhitespaceHandling = WhitespaceHandling.None;

            using (var sr = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(htmlContent))))
            {
              sgmlReader.InputStream = sr;

              var document = XDocument.Load(sgmlReader);

              return document;
            }
              }
        }
开发者ID:panuganti,项目名称:nreadability,代码行数:48,代码来源:SgmlDomBuilder.cs


示例20: ReadHtmlAsXhtml

 public static XElement ReadHtmlAsXhtml(string html)
 {
   //detect if xhtml by looking for namespace near start
   if (html.IndexOf("http://www.w3.org/1999/xhtml") < 200)
   {
     //must be xhtml, so just parse as xml
     return XElement.Parse(html);
   }
   else
   {
     //probably html, so parse as sgml
     SgmlReader sgml = new SgmlReader();
     sgml.DocType = "HTML";
     sgml.WhitespaceHandling = WhitespaceHandling.All;
     sgml.CaseFolding = Sgml.CaseFolding.ToLower;
     sgml.InputStream = new StringReader(html);
     return XElement.Load(sgml);
   }
 }
开发者ID:erikzaadi,项目名称:atomsitethemes.erikzaadi.com,代码行数:19,代码来源:WebHelper.cs



注:本文中的Sgml.SgmlReader类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
C# Controller.ShadowsocksController类代码示例发布时间:2022-05-26
下一篇:
C# Sgml.Node类代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap