using System;
using System.IO;
using System.Net;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using System.Text.RegularExpressions;
namespace AniNIX.Shared {
public static class WebPageAPI {
// Thanks to MSDN for this regex.
// https://msdn.microsoft.com/en-us/library/ms998267.aspx
public static Regex URLRegEx = new Regex(@"(ht|f)tp(s?)\:\/\/[0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*(:(0-9)*)*(\/?)([a-zA-Z0-9\-\.\?\,\'\/\\\+&%\$#_]*)?");
///
/// Get a webpage source -- we use this instead of WebClient because Mono doesn't handle SSL well on Linux.
/// /usr/bin/curl -s SOMEURL
///
/// the webpage whose title we should get
/// the webpage source
public static string GetPage(String pageURL) {
return ExecuteCommand.Run(String.Format("/usr/bin/curl -s --max-time 5 {0}",pageURL));
}
///
/// Get a webpage title
/// Should be equivalent to:
/// /bin/bash -c '/usr/bin/curl -s SOMEURL | perl -l -0777 -ne "print \$1 if /\s*(.*?)\s*<\/title/si"'
///
/// the webpage whose title we should get
/// the webpage title
public static string GetPageTitle(String pageURL) {
string source = GetPage(pageURL);
return Regex.Match(source, @"\
]*\>\s*(?[\s\S]*?)\", RegexOptions.IgnoreCase).Groups["Title"].Value;
}
}
}