using System; using System.IO; using System.Net; using System.Net.Security; using System.Security.Cryptography.X509Certificates; using System.Text.RegularExpressions; namespace AniNIX.Shared { public static class WebPageAPI { // Thanks to MSDN for this regex. // https://msdn.microsoft.com/en-us/library/ms998267.aspx public static Regex URLRegEx = new Regex(@"(ht|f)tp(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\'\/\\\+&%\$#_]*)?"); /// /// Get a webpage source -- we use this instead of WebClient because Mono doesn't handle SSL well on Linux. /// /usr/bin/curl -s SOMEURL /// /// the webpage whose title we should get /// the webpage source public static string GetPage(String pageURL) { return ExecuteCommand.Run(String.Format("/usr/bin/curl -s --max-time 5 {0}",pageURL)); } /// /// Get a webpage title /// Should be equivalent to: /// /bin/bash -c '/usr/bin/curl -s SOMEURL | perl -l -0777 -ne "print \$1 if /\s*(.*?)\s*<\/title/si"' /// /// the webpage whose title we should get /// the webpage title public static string GetPageTitle(String pageURL) { string source = GetPage(pageURL); return Regex.Match(source, @"\]*\>\s*(?[\s\S]*?)\</title\>", RegexOptions.IgnoreCase).Groups["Title"].Value; } } }