From affc7961522e6e02c4cff77c75cf0a1e6efb7693 Mon Sep 17 00:00:00 2001 From: bcrusu Date: Thu, 15 Oct 2015 10:01:17 +0300 Subject: [PATCH 01/14] added C# implementation --- csharp/.gitignore | 4 + csharp/Rendler.sln | 22 ++ csharp/Rendler/DotHelper.cs | 65 ++++ csharp/Rendler/Executors/CrawlExecutor.cs | 93 ++++++ csharp/Rendler/Executors/ExecutorBase.cs | 42 +++ .../Executors/Messages/CrawlResultMessage.cs | 14 + csharp/Rendler/Executors/Messages/Message.cs | 14 + .../Executors/Messages/RenderResultMessage.cs | 14 + csharp/Rendler/Executors/RenderExecutor.cs | 77 +++++ csharp/Rendler/JsonHelper.cs | 27 ++ csharp/Rendler/MesosExtensions.cs | 44 +++ csharp/Rendler/Program.cs | 71 +++++ csharp/Rendler/ProgramArguments.cs | 144 +++++++++ csharp/Rendler/Properties/AssemblyInfo.cs | 36 +++ csharp/Rendler/Rendler.csproj | 97 ++++++ csharp/Rendler/RendlerScheduler.cs | 291 ++++++++++++++++++ csharp/Rendler/RunMode.cs | 9 + csharp/Rendler/packages.config | 4 + 18 files changed, 1068 insertions(+) create mode 100644 csharp/.gitignore create mode 100644 csharp/Rendler.sln create mode 100644 csharp/Rendler/DotHelper.cs create mode 100644 csharp/Rendler/Executors/CrawlExecutor.cs create mode 100644 csharp/Rendler/Executors/ExecutorBase.cs create mode 100644 csharp/Rendler/Executors/Messages/CrawlResultMessage.cs create mode 100644 csharp/Rendler/Executors/Messages/Message.cs create mode 100644 csharp/Rendler/Executors/Messages/RenderResultMessage.cs create mode 100644 csharp/Rendler/Executors/RenderExecutor.cs create mode 100644 csharp/Rendler/JsonHelper.cs create mode 100644 csharp/Rendler/MesosExtensions.cs create mode 100644 csharp/Rendler/Program.cs create mode 100644 csharp/Rendler/ProgramArguments.cs create mode 100644 csharp/Rendler/Properties/AssemblyInfo.cs create mode 100644 csharp/Rendler/Rendler.csproj create mode 100644 csharp/Rendler/RendlerScheduler.cs create mode 100644 csharp/Rendler/RunMode.cs create mode 100644 csharp/Rendler/packages.config diff --git a/csharp/.gitignore b/csharp/.gitignore new file mode 100644 index 0000000..4127483 --- /dev/null +++ b/csharp/.gitignore @@ -0,0 +1,4 @@ +.vs +packages +Rendler/bin +Rendler/obj \ No newline at end of file diff --git a/csharp/Rendler.sln b/csharp/Rendler.sln new file mode 100644 index 0000000..1466f8f --- /dev/null +++ b/csharp/Rendler.sln @@ -0,0 +1,22 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 14 +VisualStudioVersion = 14.0.23107.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Rendler", "Rendler\Rendler.csproj", "{493B8B8C-97CF-4C2C-9276-E553C0CB5E88}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {493B8B8C-97CF-4C2C-9276-E553C0CB5E88}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {493B8B8C-97CF-4C2C-9276-E553C0CB5E88}.Debug|Any CPU.Build.0 = Debug|Any CPU + {493B8B8C-97CF-4C2C-9276-E553C0CB5E88}.Release|Any CPU.ActiveCfg = Release|Any CPU + {493B8B8C-97CF-4C2C-9276-E553C0CB5E88}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/csharp/Rendler/DotHelper.cs b/csharp/Rendler/DotHelper.cs new file mode 100644 index 0000000..fc18850 --- /dev/null +++ b/csharp/Rendler/DotHelper.cs @@ -0,0 +1,65 @@ +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace Rendler +{ + internal static class DotHelper + { + public static void Write(string outputPath, IDictionary> nodeToChildNodes, + IDictionary nodeImageFileName) + { + var nodeNames = new Dictionary(); + var nodeIdCounter = 0; + + using (var fs = new FileStream(outputPath, FileMode.CreateNew, FileAccess.Write, FileShare.Write)) + using (var writer = new StreamWriter(fs, Encoding.UTF8)) + { + writer.WriteLine("digraph G {"); + writer.WriteLine("\tnode [shape=box];"); + + foreach (var node in nodeToChildNodes) + { + var url = node.Key; + var nodeName = "url_" + (++nodeIdCounter); + nodeNames[url] = nodeName; + + writer.Write("\t"); + writer.Write(nodeName); + + string imageFileName; + if (nodeImageFileName.TryGetValue(url, out imageFileName)) + { + writer.Write(" [label=\"\" image=\""); + writer.Write(imageFileName); + } + else + { + writer.Write(" [label=\""); + writer.Write(url); + } + + writer.WriteLine("\"];"); + } + + writer.WriteLine(); + + foreach (var node in nodeToChildNodes) + { + var nodeName = nodeNames[node.Key]; + foreach (var childNode in node.Value) + { + var childNodeName = nodeNames[childNode]; + writer.Write("\t"); + writer.Write(nodeName); + writer.Write(" -> "); + writer.Write(childNodeName); + writer.WriteLine(";"); + } + } + + writer.WriteLine("}"); + } + } + } +} diff --git a/csharp/Rendler/Executors/CrawlExecutor.cs b/csharp/Rendler/Executors/CrawlExecutor.cs new file mode 100644 index 0000000..8562706 --- /dev/null +++ b/csharp/Rendler/Executors/CrawlExecutor.cs @@ -0,0 +1,93 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using mesos; +using mesosclr; +using Rendler.Executors.Messages; + +namespace Rendler.Executors +{ + internal class CrawlExecutor : ExecutorBase + { + private static readonly Regex ExtractLinksRegex = new Regex ("]+href=[\"']?(?[^\"'>]+)[\"']?[^>]*>(.+?)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + public override void Registered (IExecutorDriver driver, ExecutorInfo executorInfo, FrameworkInfo frameworkInfo, SlaveInfo slaveInfo) + { + Console.WriteLine ($"Registered executor on '{slaveInfo.hostname}'."); + } + + public override void LaunchTask (IExecutorDriver driver, TaskInfo taskInfo) + { + Console.WriteLine ($"Launching crawl task '{taskInfo.task_id.value}'..."); + + Task.Factory.StartNew (() => { + try { + RunTask (driver, taskInfo); + } catch (Exception e) { + Console.WriteLine ($"Exception during crawl operation: {e}"); + driver.SendTaskErrorStatus (taskInfo.task_id); + } + }); + } + + private static void RunTask (IExecutorDriver driver, TaskInfo taskInfo) + { + driver.SendTaskRunningStatus (taskInfo.task_id); + + var url = Encoding.UTF8.GetString (taskInfo.data); + + var htmlContent = GetUrlContent (url); + if (htmlContent != null) { + var links = ExtractLinks (htmlContent); + links = links + .Select (x => x.ToLower ()) + .Distinct (StringComparer.CurrentCultureIgnoreCase); + + if (links.Any ()) + SendCrawlResultMessage (driver, url, links.ToArray ()); + } + + driver.SendTaskFinishedStatus (taskInfo.task_id); + } + + private static IEnumerable ExtractLinks (string htmlContent) + { + var match = ExtractLinksRegex.Match (htmlContent); + while (match.Success) { + yield return match.Groups ["link"].Value.Trim (); + match = match.NextMatch (); + } + } + + private static string GetUrlContent (string url) + { + using (var client = new WebClient ()) { + client.Headers.Add ("X-PoweredBy: minions"); + + try { + return client.DownloadString (url); + } catch (WebException e) { + Console.WriteLine ($"Error fetching url '{url}'; Error: {e}"); + return null; + } + } + } + + private static void SendCrawlResultMessage (IExecutorDriver driver, string url, string[] links) + { + var message = new Message { + Type = "CrawlResult", + Body = JsonHelper.Serialize (new CrawlResultMessage { + Url = url, + Links = links + }) + }; + + driver.SendFrameworkMessage (JsonHelper.Serialize (message)); + } + } +} diff --git a/csharp/Rendler/Executors/ExecutorBase.cs b/csharp/Rendler/Executors/ExecutorBase.cs new file mode 100644 index 0000000..d8329fb --- /dev/null +++ b/csharp/Rendler/Executors/ExecutorBase.cs @@ -0,0 +1,42 @@ +using System; +using mesos; +using mesosclr; + +namespace Rendler.Executors +{ + abstract class ExecutorBase : IExecutor + { + public virtual void Registered(IExecutorDriver driver, ExecutorInfo executorInfo, FrameworkInfo frameworkInfo, SlaveInfo slaveInfo) + { + } + + public virtual void Reregistered(IExecutorDriver driver, SlaveInfo slaveInfo) + { + } + + public virtual void Disconnected(IExecutorDriver driver) + { + } + + public virtual void LaunchTask(IExecutorDriver driver, TaskInfo taskInfo) + { + } + + public virtual void KillTask(IExecutorDriver driver, TaskID taskId) + { + } + + public virtual void FrameworkMessage(IExecutorDriver driver, byte[] data) + { + } + + public virtual void Shutdown(IExecutorDriver driver) + { + } + + public virtual void Error(IExecutorDriver driver, string message) + { + Console.WriteLine($"Error: '{message}'."); + } + } +} diff --git a/csharp/Rendler/Executors/Messages/CrawlResultMessage.cs b/csharp/Rendler/Executors/Messages/CrawlResultMessage.cs new file mode 100644 index 0000000..bdafe35 --- /dev/null +++ b/csharp/Rendler/Executors/Messages/CrawlResultMessage.cs @@ -0,0 +1,14 @@ +using System.Runtime.Serialization; + +namespace Rendler.Executors.Messages +{ + [DataContract] + public class CrawlResultMessage + { + [DataMember] + public string Url { get; set; } + + [DataMember] + public string[] Links { get; set; } + } +} diff --git a/csharp/Rendler/Executors/Messages/Message.cs b/csharp/Rendler/Executors/Messages/Message.cs new file mode 100644 index 0000000..5bbe425 --- /dev/null +++ b/csharp/Rendler/Executors/Messages/Message.cs @@ -0,0 +1,14 @@ +using System.Runtime.Serialization; + +namespace Rendler.Executors.Messages +{ + [DataContract] + internal class Message + { + [DataMember] + public string Type { get; set; } + + [DataMember] + public byte[] Body { get; set; } + } +} diff --git a/csharp/Rendler/Executors/Messages/RenderResultMessage.cs b/csharp/Rendler/Executors/Messages/RenderResultMessage.cs new file mode 100644 index 0000000..a652fba --- /dev/null +++ b/csharp/Rendler/Executors/Messages/RenderResultMessage.cs @@ -0,0 +1,14 @@ +using System.Runtime.Serialization; + +namespace Rendler.Executors.Messages +{ + [DataContract] + public class RenderResultMessage + { + [DataMember] + public string Url { get; set; } + + [DataMember] + public string FileName { get; set; } + } +} diff --git a/csharp/Rendler/Executors/RenderExecutor.cs b/csharp/Rendler/Executors/RenderExecutor.cs new file mode 100644 index 0000000..976835e --- /dev/null +++ b/csharp/Rendler/Executors/RenderExecutor.cs @@ -0,0 +1,77 @@ +using System; +using System.Diagnostics; +using System.IO; +using System.Text; +using System.Threading.Tasks; +using mesos; +using mesosclr; +using Rendler.Executors.Messages; + +namespace Rendler.Executors +{ + class RenderExecutor : ExecutorBase + { + private string _outputDir; + + public override void Registered(IExecutorDriver driver, ExecutorInfo executorInfo, FrameworkInfo frameworkInfo, SlaveInfo slaveInfo) + { + _outputDir = Encoding.UTF8.GetString (executorInfo.data); + Console.WriteLine ($"Registered executor on host '{slaveInfo.hostname}'. Output dir is '{_outputDir}'."); + } + + public override void LaunchTask(IExecutorDriver driver, TaskInfo taskInfo) + { + Console.WriteLine($"Launching render task '{taskInfo.task_id.value}'..."); + + Task.Factory.StartNew (() => { + try { + RunTask (driver, taskInfo); + } catch (Exception e) { + Console.WriteLine ($"Exception during render operation: {e}"); + driver.SendTaskErrorStatus (taskInfo.task_id); + } + }); + } + + private void RunTask(IExecutorDriver driver, TaskInfo taskInfo) + { + driver.SendTaskRunningStatus(taskInfo.task_id); + + var url = Encoding.UTF8.GetString(taskInfo.data); + var imageFileName = RunRendering(taskInfo.task_id, url); + + SendRenderResultMessage(driver, url, imageFileName); + driver.SendTaskFinishedStatus(taskInfo.task_id); + } + + private string RunRendering(TaskID taskId, string url) + { + var imagePath = Path.Combine(_outputDir, $"{taskId.value}.png"); + + var startInfo = new ProcessStartInfo("phantomjs"); + startInfo.Arguments = $"render.js \"{url}\" \"{imagePath}\""; + startInfo.WindowStyle = ProcessWindowStyle.Hidden; + startInfo.UseShellExecute = false; + + var process = Process.Start(startInfo); + process.WaitForExit(); + + return imagePath; + } + + private static void SendRenderResultMessage(IExecutorDriver driver, string url, string fileName) + { + var message = new Message + { + Type = "RenderResult", + Body = JsonHelper.Serialize(new RenderResultMessage + { + Url = url, + FileName = fileName + }) + }; + + driver.SendFrameworkMessage(JsonHelper.Serialize(message)); + } + } +} diff --git a/csharp/Rendler/JsonHelper.cs b/csharp/Rendler/JsonHelper.cs new file mode 100644 index 0000000..8a18b7f --- /dev/null +++ b/csharp/Rendler/JsonHelper.cs @@ -0,0 +1,27 @@ +using System.IO; +using System.Runtime.Serialization.Json; + +namespace Rendler +{ + internal static class JsonHelper + { + public static byte[] Serialize(object obj) + { + var dcs = new DataContractJsonSerializer(obj.GetType()); + using (var ms = new MemoryStream()) + { + dcs.WriteObject(ms, obj); + return ms.ToArray(); + } + } + + public static T Deserialize(byte[] bytes) + { + var dcs = new DataContractJsonSerializer(typeof(T)); + using (var ms = new MemoryStream(bytes)) + { + return (T)dcs.ReadObject(ms); + } + } + } +} diff --git a/csharp/Rendler/MesosExtensions.cs b/csharp/Rendler/MesosExtensions.cs new file mode 100644 index 0000000..bb1f8b0 --- /dev/null +++ b/csharp/Rendler/MesosExtensions.cs @@ -0,0 +1,44 @@ +using mesos; +using mesosclr; + +namespace Rendler +{ + internal static class MesosExtensions + { + public static void SendTaskRunningStatus(this IExecutorDriver driver, TaskID taskId) + { + driver.SendStatusUpdate(new TaskStatus + { + task_id = taskId, + state = TaskState.TASK_RUNNING + }); + } + + public static void SendTaskFinishedStatus(this IExecutorDriver driver, TaskID taskId) + { + driver.SendStatusUpdate(new TaskStatus + { + task_id = taskId, + state = TaskState.TASK_FINISHED + }); + } + + public static void SendTaskErrorStatus(this IExecutorDriver driver, TaskID taskId) + { + driver.SendStatusUpdate(new TaskStatus + { + task_id = taskId, + state = TaskState.TASK_ERROR + }); + } + + public static bool IsTerminal(this TaskState state) + { + return state == TaskState.TASK_FINISHED || + state == TaskState.TASK_FAILED || + state == TaskState.TASK_KILLED || + state == TaskState.TASK_LOST || + state == TaskState.TASK_ERROR; + } + } +} diff --git a/csharp/Rendler/Program.cs b/csharp/Rendler/Program.cs new file mode 100644 index 0000000..3832f4d --- /dev/null +++ b/csharp/Rendler/Program.cs @@ -0,0 +1,71 @@ +using System; +using mesos; +using mesosclr; +using Rendler.Executors; + +namespace Rendler +{ + class Program + { + static int Main(string[] args) + { + var arguments = Arguments.Parse (args); + if (arguments == null || !arguments.Validate ()) + return -1; + + switch (arguments.RunMode) { + case RunMode.Scheduler: + return RunScheduler (arguments.MesosMaster, arguments.StartUrl, arguments.OutputDir, arguments.RunAsUser); + case RunMode.Executor: + return RunExecutor (arguments.ExecutorName); + default: + return -1; + } + } + + private static int RunScheduler(string mesosMaster, string startUrl, string outputDir, string runAsUser) + { + var frameworkInfo = new FrameworkInfo { + id = new FrameworkID { + value = "Rendler" + }, + name = "Rendler (C#)", + failover_timeout = 5, //seconds + checkpoint = false, + user = runAsUser + }; + + var scheduler = new RendlerScheduler(startUrl ?? "https://mesosphere.com", outputDir, runAsUser); + var driver = new MesosSchedulerDriver(scheduler, frameworkInfo, mesosMaster); + + Console.WriteLine ("Running driver..."); + var result = driver.Run() == Status.DRIVER_STOPPED ? 0 : 1; + Console.WriteLine ($"Driver finished with status {result}."); + + return result; + } + + private static int RunExecutor(string executorName) + { + IExecutor executor; + + switch (executorName) + { + case "render": + executor = new RenderExecutor(); + break; + case "crawl": + executor = new CrawlExecutor(); + break; + default: + { + Console.WriteLine($"Invlaid executor provided: '{executorName}'."); + return -2; + } + } + + var driver = new MesosExecutorDriver(executor); + return driver.Run() == Status.DRIVER_STOPPED ? 0 : 1; + } + } +} diff --git a/csharp/Rendler/ProgramArguments.cs b/csharp/Rendler/ProgramArguments.cs new file mode 100644 index 0000000..f0b58ad --- /dev/null +++ b/csharp/Rendler/ProgramArguments.cs @@ -0,0 +1,144 @@ +using System; +using System.IO; + +namespace Rendler +{ + internal class Arguments + { + public RunMode RunMode { get; private set; } + + public string MesosMaster { get; private set; } + + public string ExecutorName { get; private set; } + + public string StartUrl { get; private set; } + + public string OutputDir { get; private set; } + + public string RunAsUser { get; private set; } + + public static Arguments Parse(string[] args) + { + var runMode = RunMode.Default; + string mesosMaster = null; + string executor = null; + string outputDir = null; + string startUrl = null; + string runAsUser = null; + + foreach (var arg in args) + { + if (arg.StartsWith("-executor=")) + { + if (runMode == RunMode.Executor) { + Console.WriteLine("Executor option can be specified only once."); + return null; + } + if (runMode == RunMode.Scheduler) + { + Console.WriteLine("Scheduler and Executor run modes are mutually exclusive."); + return null; + } + + executor = arg.Substring("-executor=".Length); + runMode = RunMode.Executor; + } + else if (arg.Equals("-scheduler")) + { + if (runMode == RunMode.Scheduler) { + Console.WriteLine("Scheduler option can be specified only once."); + return null; + } + if (runMode == RunMode.Executor) + { + Console.WriteLine("Scheduler and Executor run modes are mutually exclusive."); + return null; + } + + runMode = RunMode.Scheduler; + } + else if (arg.StartsWith("-master=")) + { + if (mesosMaster != null) { + Console.WriteLine("Mesos master option can be specified only once."); + return null; + } + + mesosMaster = arg.Substring("-master=".Length); + } + else if (arg.StartsWith("-output=")) + { + if (outputDir != null) { + Console.WriteLine("Output directory option can be specified only once."); + return null; + } + + outputDir = arg.Substring("-output=".Length); + } + else if (arg.StartsWith("-starturl=")) + { + if (startUrl != null) { + Console.WriteLine("Start URL option can be specified only once."); + return null; + } + + startUrl = arg.Substring("-starturl=".Length); + } + else if (arg.StartsWith("-user=")) + { + if (startUrl != null) { + Console.WriteLine("User option can be specified only once."); + return null; + } + + runAsUser = arg.Substring("-user=".Length); + } + else + { + Console.WriteLine($"Unknown argument detected: '{arg}'."); + } + } + + return new Arguments + { + RunMode = runMode, + ExecutorName = executor, + MesosMaster = mesosMaster, + OutputDir = outputDir, + StartUrl = startUrl, + RunAsUser = runAsUser + }; + } + + public bool Validate () + { + switch (RunMode) { + case RunMode.Executor: + if (string.IsNullOrWhiteSpace (ExecutorName)) { + Console.WriteLine ("Invalid executor name."); + return false; + } + break; + case RunMode.Scheduler: + if (string.IsNullOrWhiteSpace (MesosMaster)) { + Console.WriteLine ("Invalid Mesos master address."); + return false; + } + if (string.IsNullOrWhiteSpace (OutputDir)) { + Console.WriteLine ("Invalid output directory."); + return false; + } + if (!Directory.Exists(OutputDir)){ + Console.WriteLine ("Output directory does not exist."); + return false; + } + break; + default: + Console.WriteLine ("Run mode was not specified."); + return false; + } + + return true; + } + } +} diff --git a/csharp/Rendler/Properties/AssemblyInfo.cs b/csharp/Rendler/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..6315421 --- /dev/null +++ b/csharp/Rendler/Properties/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("mesosclr.Rendler")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("IBM")] +[assembly: AssemblyProduct("mesosclr.Rendler")] +[assembly: AssemblyCopyright("Copyright © IBM 2015")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("493b8b8c-97cf-4c2c-9276-e553c0cb5e88")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/csharp/Rendler/Rendler.csproj b/csharp/Rendler/Rendler.csproj new file mode 100644 index 0000000..05252cf --- /dev/null +++ b/csharp/Rendler/Rendler.csproj @@ -0,0 +1,97 @@ + + + + + Debug + AnyCPU + {493B8B8C-97CF-4C2C-9276-E553C0CB5E88} + Exe + Properties + Rendler + rendler + v4.5 + 512 + true + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + -scheduler -master=127.0.0.50:5050 -user=bcrusu -output=/tmp/rendlerout + + + + + + + + + + + + + false + 6 + + + AnyCPU + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + 6 + + + + False + ..\..\..\mesos-clr\src\main\mesosclr\bin\Debug\mesosclr.dll + + + + + + + + ..\packages\protobuf-net.2.0.0.668\lib\net40\protobuf-net.dll + + + + + + + + + + + + + + + + + + + + + + + + render.js + PreserveNewest + + + + + \ No newline at end of file diff --git a/csharp/Rendler/RendlerScheduler.cs b/csharp/Rendler/RendlerScheduler.cs new file mode 100644 index 0000000..0dda1c7 --- /dev/null +++ b/csharp/Rendler/RendlerScheduler.cs @@ -0,0 +1,291 @@ +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading; +using mesos; +using mesosclr; +using Rendler.Executors.Messages; + +namespace Rendler +{ + internal class RendlerScheduler : IScheduler + { + private const int MaxTasksToRun = 256; // limit for demonstration purpose + + private const double RenderCpus = 1d; + private const double RenderMem = 128d; + private const double CrawlCpus = 0.5d; + private const double CrawlMem = 64d; + + private readonly string _outputDir; + private readonly string _runAsUser; + + + private int _launchedTasks; + private int _finishedTasksCount; + private readonly ConcurrentQueue _crawlQueue = new ConcurrentQueue(); + private readonly ConcurrentQueue _renderQueue = new ConcurrentQueue(); + private readonly ISet _crawled = new HashSet(); + + private readonly ConcurrentDictionary _urlToFileMap = new ConcurrentDictionary(); + private readonly ConcurrentDictionary> _edgesMap = new ConcurrentDictionary>(); + + public RendlerScheduler(string startUrl, string outputDir, + string runAsUser = null) + { + if (startUrl == null) throw new ArgumentNullException(nameof(startUrl)); + if (outputDir == null) throw new ArgumentNullException(nameof(outputDir)); + _outputDir = outputDir; + _runAsUser = runAsUser; + + _crawlQueue.Enqueue(startUrl); + _renderQueue.Enqueue(startUrl); + } + + public void Registered(ISchedulerDriver driver, FrameworkID frameworkId, MasterInfo masterInfo) + { + Console.WriteLine($"Registered with Mesos master. FrameworkId='{frameworkId.value}'."); + } + + public void Reregistered(ISchedulerDriver driver, MasterInfo masterInfo) + { + } + + public void ResourceOffers(ISchedulerDriver driver, IEnumerable offers) + { + foreach (var offer in offers) + { + var tasks = new List(); + var resourcesCounter = new ResourcesCounter(offer); + bool done; + do + { + done = true; + + string renderUrl; + if (resourcesCounter.HasRenderTaskResources() && _renderQueue.TryDequeue(out renderUrl)) + { + tasks.Add(GetRenderTaskInfo(offer, ++_launchedTasks, renderUrl)); + resourcesCounter.SubstractRenderResources(); + done = false; + } + + string crawlUrl; + if (resourcesCounter.HasCrawlTaskResources() && _crawlQueue.TryDequeue(out crawlUrl)) + { + tasks.Add(GetCrawlTaskInfo(offer, ++_launchedTasks, crawlUrl)); + resourcesCounter.SubstractCrawlResources(); + _crawled.Add(crawlUrl); + done = false; + } + } while (!done); + + if (tasks.Any ()) { + driver.LaunchTasks (new[] { offer.id }, tasks); + } + else + driver.DeclineOffer(offer.id); + } + } + + public void OfferRescinded(ISchedulerDriver driver, OfferID offerId) + { + } + + public void StatusUpdate(ISchedulerDriver driver, TaskStatus status) + { + if (status.state.IsTerminal()) + { + Console.WriteLine($"Status update: task '{status.task_id.value}' has terminated with state '{status.state}'."); + var finishedTasksCount = Interlocked.Increment(ref _finishedTasksCount); + + if (finishedTasksCount == MaxTasksToRun) + { + Console.WriteLine("Reached the max number of tasks to run. Stopping..."); + + var dotWritePath = Path.Combine(_outputDir, "result.dot"); + DotHelper.Write(dotWritePath, _edgesMap, _urlToFileMap); + driver.Stop(); + } + } + else + { + Console.WriteLine($"Status update: task '{status.task_id.value}' is in state '{status.state}'."); + } + } + + public void FrameworkMessage(ISchedulerDriver driver, ExecutorID executorId, SlaveID slaveId, byte[] data) + { + var message = JsonHelper.Deserialize (data); + switch (message.Type) { + case "CrawlResult": + var crawlResult = JsonHelper.Deserialize (message.Body); + Console.WriteLine ($"Framework message : got {crawlResult.Links.Length} links from url '{crawlResult.Url}'."); + + foreach (var link in crawlResult.Links) { + if (_crawled.Contains (link)) + continue; + + _crawlQueue.Enqueue (link); + _renderQueue.Enqueue (link); + } + + // update edges: url -> links + var edges = _edgesMap.GetOrAdd (crawlResult.Url, x => new List ()); + edges.AddRange (crawlResult.Links); + + // empty edge list for links + foreach (var link in crawlResult.Links) + _edgesMap.GetOrAdd (link, x => new List ()); + break; + case "RenderResult": + var renderResult = JsonHelper.Deserialize (message.Body); + Console.WriteLine ($"Framework message : saved '{renderResult.FileName}' for url '{renderResult.Url}'."); + + _urlToFileMap [renderResult.Url] = renderResult.FileName; + break; + default: + Console.WriteLine ($"Unrecognized message type: '{message.Type}'"); + break; + } + } + + public void Disconnected(ISchedulerDriver driver) + { + } + + public void SlaveLost(ISchedulerDriver driver, SlaveID slaveId) + { + } + + public void ExecutorLost(ISchedulerDriver driver, ExecutorID executorId, SlaveID slaveId, int status) + { + } + + public void Error(ISchedulerDriver driver, string message) + { + Console.WriteLine($"Error: '{message}'."); + } + + private TaskInfo GetRenderTaskInfo(Offer offer, int uniqueId, string url) + { + var result = new TaskInfo { + name = "Rendler.Render_" + uniqueId, + task_id = new TaskID { value = uniqueId.ToString () }, + slave_id = offer.slave_id, + resources = { + new Resource { + name = "cpus", + type = Value.Type.SCALAR, + scalar = new Value.Scalar { value = RenderCpus } + }, + new Resource { + name = "mem", + type = Value.Type.SCALAR, + scalar = new Value.Scalar { value = RenderMem } + } + }, + executor = new ExecutorInfo { + executor_id = new ExecutorID { value = "RenderExecutor" }, + command = new CommandInfo { + value = "mono rendler.exe -executor=render", + user = _runAsUser + }, + data = Encoding.UTF8.GetBytes (_outputDir) + }, + data = Encoding.UTF8.GetBytes (url) + }; + + result.executor.command.uris.Add (new CommandInfo.URI { + cache = false, + extract = true, + value = "./rendler.tar.gz", // relative to "frameworks_home" mesos-slave command argument + executable = false + }); + + return result; + } + + private TaskInfo GetCrawlTaskInfo(Offer offer, int uniqueId, string url) + { + var result = new TaskInfo { + name = "Rendler.Crawl_" + uniqueId, + task_id = new TaskID { value = uniqueId.ToString () }, + slave_id = offer.slave_id, + resources = { + new Resource { + name = "cpus", + type = Value.Type.SCALAR, + scalar = new Value.Scalar { value = CrawlCpus } + }, + new Resource { name = "mem", type = Value.Type.SCALAR, scalar = new Value.Scalar { value = CrawlMem } } + }, + executor = new ExecutorInfo { + executor_id = new ExecutorID { value = "CrawlExecutor" }, + command = new CommandInfo { + value = "mono rendler.exe -executor=crawl", + user = _runAsUser + }, + }, + data = Encoding.UTF8.GetBytes (url) + }; + + result.executor.command.uris.Add (new CommandInfo.URI { + cache = false, + extract = true, + value = "./rendler.tar.gz", // relative to "frameworks_home" mesos-slave command argument + executable = false + }); + + return result; + } + + private class ResourcesCounter + { + private double _cpus; + private double _mem; + + public ResourcesCounter(Offer offer) + { + var cpusResource = offer.resources.SingleOrDefault(x => x.name == "cpus"); + var memResource = offer.resources.SingleOrDefault(x => x.name == "mem"); + _cpus = cpusResource?.scalar.value ?? 0d; + _mem = memResource?.scalar.value ?? 0d; + } + + private void Substract(double cpus, double mem) + { + _cpus = _cpus - cpus; + _mem = _mem - mem; + } + + public bool HasRenderTaskResources() + { + return HasResources(RenderCpus, RenderMem); + } + + public bool HasCrawlTaskResources() + { + return HasResources(CrawlCpus, CrawlMem); + } + + public void SubstractRenderResources() + { + Substract(RenderCpus, RenderMem); + } + + public void SubstractCrawlResources() + { + Substract(CrawlCpus, CrawlMem); + } + + private bool HasResources(double cpus, double mem) + { + return _cpus >= cpus && _mem >= mem; + } + } + } +} diff --git a/csharp/Rendler/RunMode.cs b/csharp/Rendler/RunMode.cs new file mode 100644 index 0000000..ae89e3d --- /dev/null +++ b/csharp/Rendler/RunMode.cs @@ -0,0 +1,9 @@ +namespace Rendler +{ + internal enum RunMode + { + Default, + Scheduler, + Executor + } +} diff --git a/csharp/Rendler/packages.config b/csharp/Rendler/packages.config new file mode 100644 index 0000000..3c1695b --- /dev/null +++ b/csharp/Rendler/packages.config @@ -0,0 +1,4 @@ + + + + \ No newline at end of file From 7144340e99334acf5346306a8f0f966ce902746b Mon Sep 17 00:00:00 2001 From: bcrusu Date: Thu, 15 Oct 2015 10:53:49 +0300 Subject: [PATCH 02/14] cleanup --- README.md | 3 ++- csharp/.gitignore | 8 +++++++- csharp/Rendler/README.md | 4 ++++ csharp/Rendler/Rendler.csproj | 20 +++----------------- csharp/Rendler/RendlerScheduler.cs | 12 ++++++------ 5 files changed, 22 insertions(+), 25 deletions(-) create mode 100644 csharp/Rendler/README.md diff --git a/README.md b/README.md index ba1aeb5..8b217eb 100644 --- a/README.md +++ b/README.md @@ -39,12 +39,13 @@ You can see that 1 slave is registered and you've got some idle CPUs and Memory. ### Run RENDLER in the `mesos-demo` VM Check implementations of the RENDLER scheduler in the `python`, `go`, -`scala`, and `cpp` directories. Run instructions are here: +`scala`, `cpp` and `csharp` directories. Run instructions are here: - [Python RENDLER framework](python/README.md) - [Go RENDLER framework](go/README.md) - [Scala RENDLER framework](scala/README.md) - [C++ RENDLER framework](cpp/README.md) +- [C# RENDLER framework](csharp/README.md) Feel free to contribute your own! diff --git a/csharp/.gitignore b/csharp/.gitignore index 4127483..94c52c4 100644 --- a/csharp/.gitignore +++ b/csharp/.gitignore @@ -1,4 +1,10 @@ +*.suo +*.user +*.sdf +*.userprefs .vs packages Rendler/bin -Rendler/obj \ No newline at end of file +Rendler/obj + + diff --git a/csharp/Rendler/README.md b/csharp/Rendler/README.md new file mode 100644 index 0000000..dcc8d30 --- /dev/null +++ b/csharp/Rendler/README.md @@ -0,0 +1,4 @@ +C# Rendler Framework +======== + +TODO diff --git a/csharp/Rendler/Rendler.csproj b/csharp/Rendler/Rendler.csproj index 05252cf..f3346e9 100644 --- a/csharp/Rendler/Rendler.csproj +++ b/csharp/Rendler/Rendler.csproj @@ -21,19 +21,6 @@ DEBUG;TRACE prompt 4 - -scheduler -master=127.0.0.50:5050 -user=bcrusu -output=/tmp/rendlerout - - - - - - - - - - - - false 6 @@ -48,10 +35,6 @@ 6 - - False - ..\..\..\mesos-clr\src\main\mesosclr\bin\Debug\mesosclr.dll - @@ -60,6 +43,9 @@ ..\packages\protobuf-net.2.0.0.668\lib\net40\protobuf-net.dll + + ..\..\..\mesos-clr\src\main\mesosclr\bin\Debug\mesosclr.dll + diff --git a/csharp/Rendler/RendlerScheduler.cs b/csharp/Rendler/RendlerScheduler.cs index 0dda1c7..0f14e0c 100644 --- a/csharp/Rendler/RendlerScheduler.cs +++ b/csharp/Rendler/RendlerScheduler.cs @@ -30,8 +30,8 @@ internal class RendlerScheduler : IScheduler private readonly ConcurrentQueue _renderQueue = new ConcurrentQueue(); private readonly ISet _crawled = new HashSet(); - private readonly ConcurrentDictionary _urlToFileMap = new ConcurrentDictionary(); - private readonly ConcurrentDictionary> _edgesMap = new ConcurrentDictionary>(); + private readonly ConcurrentDictionary _renderResults = new ConcurrentDictionary(); + private readonly ConcurrentDictionary> _crawlResults = new ConcurrentDictionary>(); public RendlerScheduler(string startUrl, string outputDir, string runAsUser = null) @@ -107,7 +107,7 @@ public void StatusUpdate(ISchedulerDriver driver, TaskStatus status) Console.WriteLine("Reached the max number of tasks to run. Stopping..."); var dotWritePath = Path.Combine(_outputDir, "result.dot"); - DotHelper.Write(dotWritePath, _edgesMap, _urlToFileMap); + DotHelper.Write(dotWritePath, _crawlResults, _renderResults); driver.Stop(); } } @@ -134,18 +134,18 @@ public void FrameworkMessage(ISchedulerDriver driver, ExecutorID executorId, Sla } // update edges: url -> links - var edges = _edgesMap.GetOrAdd (crawlResult.Url, x => new List ()); + var edges = _crawlResults.GetOrAdd (crawlResult.Url, x => new List ()); edges.AddRange (crawlResult.Links); // empty edge list for links foreach (var link in crawlResult.Links) - _edgesMap.GetOrAdd (link, x => new List ()); + _crawlResults.GetOrAdd (link, x => new List ()); break; case "RenderResult": var renderResult = JsonHelper.Deserialize (message.Body); Console.WriteLine ($"Framework message : saved '{renderResult.FileName}' for url '{renderResult.Url}'."); - _urlToFileMap [renderResult.Url] = renderResult.FileName; + _renderResults [renderResult.Url] = renderResult.FileName; break; default: Console.WriteLine ($"Unrecognized message type: '{message.Type}'"); From 1821096cc4883a3abe69c7e4d9b81f0737f86252 Mon Sep 17 00:00:00 2001 From: bcrusu Date: Thu, 15 Oct 2015 18:01:43 +0300 Subject: [PATCH 03/14] added readme file --- csharp/.gitignore | 3 ++- csharp/README.md | 15 +++++++++++++++ csharp/Rendler/README.md | 4 ---- csharp/Rendler/Rendler.csproj | 8 ++++++-- csharp/ext/readme | 3 +++ 5 files changed, 26 insertions(+), 7 deletions(-) create mode 100644 csharp/README.md delete mode 100644 csharp/Rendler/README.md create mode 100644 csharp/ext/readme diff --git a/csharp/.gitignore b/csharp/.gitignore index 94c52c4..3773b92 100644 --- a/csharp/.gitignore +++ b/csharp/.gitignore @@ -6,5 +6,6 @@ packages Rendler/bin Rendler/obj - +ext/mesosclr.dll +ext/libmesosclr.so diff --git a/csharp/README.md b/csharp/README.md new file mode 100644 index 0000000..41328dc --- /dev/null +++ b/csharp/README.md @@ -0,0 +1,15 @@ +## C# Rendler Framework + +### Preparation + +1. The implementation uses the [mesos-clr](https://github.com/bcrusu/mesos-clr) library which needs to be build and placed to the `ext` directory before building the Rendler project. +2. Pack the Rendler binaries to `rendler.tar.gz` and place the archive to the [frameworks_home](http://mesos.apache.org/documentation/latest/configuration/) directory. + +### Running + +To start the Rendler framework, run the command: +```bash +mono rendler.exe -scheduler -master=MASTER_ADDRESS -output=RENDLER_OUTPUT_DIR [-user=RUN_AS_USER] +``` + + diff --git a/csharp/Rendler/README.md b/csharp/Rendler/README.md deleted file mode 100644 index dcc8d30..0000000 --- a/csharp/Rendler/README.md +++ /dev/null @@ -1,4 +0,0 @@ -C# Rendler Framework -======== - -TODO diff --git a/csharp/Rendler/Rendler.csproj b/csharp/Rendler/Rendler.csproj index f3346e9..5e76707 100644 --- a/csharp/Rendler/Rendler.csproj +++ b/csharp/Rendler/Rendler.csproj @@ -44,7 +44,7 @@ ..\packages\protobuf-net.2.0.0.668\lib\net40\protobuf-net.dll - ..\..\..\mesos-clr\src\main\mesosclr\bin\Debug\mesosclr.dll + ..\ext\mesosclr.dll @@ -71,6 +71,10 @@ render.js PreserveNewest + + libmesosclr.so + PreserveNewest + - \ No newline at end of file + diff --git a/csharp/ext/readme b/csharp/ext/readme new file mode 100644 index 0000000..b78a6bd --- /dev/null +++ b/csharp/ext/readme @@ -0,0 +1,3 @@ +directory containing the mesos-clr binaries required to build Rendler +- mesosclr.dll +- libmesosclr.so From 03173e0377d2b53c58b9e2c4d9de0f1bacee6f6e Mon Sep 17 00:00:00 2001 From: bcrusu Date: Thu, 15 Oct 2015 18:11:14 +0300 Subject: [PATCH 04/14] minor change to README file --- csharp/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/README.md b/csharp/README.md index 41328dc..40f1b33 100644 --- a/csharp/README.md +++ b/csharp/README.md @@ -9,7 +9,7 @@ To start the Rendler framework, run the command: ```bash -mono rendler.exe -scheduler -master=MASTER_ADDRESS -output=RENDLER_OUTPUT_DIR [-user=RUN_AS_USER] +mono rendler.exe -scheduler -master=MASTER_ADDRESS -output=RENDLER_OUTPUT_DIR [-starturl=CRAWL_START_URL] [-user=RUN_AS_USER] ``` From 5c5110c833d6fe935ec506351b73a60820aef15a Mon Sep 17 00:00:00 2001 From: bcrusu Date: Thu, 15 Oct 2015 19:50:05 +0300 Subject: [PATCH 05/14] Update README.md --- csharp/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/csharp/README.md b/csharp/README.md index 40f1b33..ccdb887 100644 --- a/csharp/README.md +++ b/csharp/README.md @@ -3,7 +3,7 @@ ### Preparation 1. The implementation uses the [mesos-clr](https://github.com/bcrusu/mesos-clr) library which needs to be build and placed to the `ext` directory before building the Rendler project. -2. Pack the Rendler binaries to `rendler.tar.gz` and place the archive to the [frameworks_home](http://mesos.apache.org/documentation/latest/configuration/) directory. +2. Pack the Rendler binaries to `rendler.tar.gz` and place the archive inside the [frameworks_home](http://mesos.apache.org/documentation/latest/configuration/) directory. ### Running @@ -12,4 +12,3 @@ To start the Rendler framework, run the command: mono rendler.exe -scheduler -master=MASTER_ADDRESS -output=RENDLER_OUTPUT_DIR [-starturl=CRAWL_START_URL] [-user=RUN_AS_USER] ``` - From 8867c3938dcfcf680d8d149ea6adee3ae27512da Mon Sep 17 00:00:00 2001 From: bcrusu Date: Sat, 31 Oct 2015 20:11:44 +0200 Subject: [PATCH 06/14] Rendler nodejs implementation - initial commit --- nodejs/.gitignore | 3 ++ nodejs/index.js | 3 ++ nodejs/package.json | 11 ++++ nodejs/src/arguments.js | 117 ++++++++++++++++++++++++++++++++++++++++ nodejs/src/rendler.js | 29 ++++++++++ 5 files changed, 163 insertions(+) create mode 100644 nodejs/.gitignore create mode 100644 nodejs/index.js create mode 100644 nodejs/package.json create mode 100644 nodejs/src/arguments.js create mode 100644 nodejs/src/rendler.js diff --git a/nodejs/.gitignore b/nodejs/.gitignore new file mode 100644 index 0000000..1c0eb6e --- /dev/null +++ b/nodejs/.gitignore @@ -0,0 +1,3 @@ +/.idea +/node_modules +*.iml diff --git a/nodejs/index.js b/nodejs/index.js new file mode 100644 index 0000000..2dd84c1 --- /dev/null +++ b/nodejs/index.js @@ -0,0 +1,3 @@ +var rendler = require("./src/rendler.js"); + +rendler.main(); \ No newline at end of file diff --git a/nodejs/package.json b/nodejs/package.json new file mode 100644 index 0000000..3cdbd7a --- /dev/null +++ b/nodejs/package.json @@ -0,0 +1,11 @@ +{ + "name": "rendler", + "version": "0.0.1", + "private": true, + "scripts": { + "start": "node index.js" + }, + "dependencies": { + "mesosApi": ">0.0.0" + } +} \ No newline at end of file diff --git a/nodejs/src/arguments.js b/nodejs/src/arguments.js new file mode 100644 index 0000000..9ba352e --- /dev/null +++ b/nodejs/src/arguments.js @@ -0,0 +1,117 @@ +function parse(argsArray) { + var runMode = undefined; + var mesosMaster = undefined; + var executor = undefined; + var outputDir = undefined; + var startUrl = undefined; + var runAsUser = undefined; + + argsArray.forEach(function (arg) { + if (arg.indexOf("-executor=") === 0) { + if (runMode !== undefined) { + console.write("Invalid run mode detected. Check the 'executor' argument!"); + return null; + } + + executor = arg.substring("-executor=".length); + runMode = "executor"; + } + else if (arg.indexOf("-scheduler") === 0) { + if (runMode !== undefined) { + console.write("Invalid run mode detected. Check the 'scheduler' argument!"); + return null; + } + + runMode = "scheduler"; + } + else if (arg.indexOf("-master=") === 0) { + if (mesosMaster !== undefined) { + console.write("Mesos master option can be specified only once."); + return null; + } + + mesosMaster = arg.substring("-master=".length); + } + else if (arg.indexOf("-output=") === 0) { + if (outputDir !== undefined) { + console.write("Output directory option can be specified only once."); + return null; + } + + outputDir = arg.substring("-output=".length); + } + else if (arg.indexOf("-starturl=") === 0) { + if (startUrl !== undefined) { + console.write("Start URL option can be specified only once."); + return null; + } + + startUrl = arg.substring("-starturl=".length); + } + else if (arg.indexOf("-user=") === 0) { + if (runAsUser !== undefined) { + console.write("User option can be specified only once."); + return null; + } + + runAsUser = arg.substring("-user=".length); + } + }); + + return { + 'runMode': runMode, + 'mesosMaster': mesosMaster, + 'executorName': executor, + 'outputDir': outputDir, + 'startUrl': startUrl, + 'runAsUser': runAsUser + } +} + +function validate(arguments) { + switch (arguments.runMode) { + case "executor": + if (!arguments.executorName) { + console.write("Invalid executor name."); + return false; + } + break; + case "scheduler" : + if (!arguments.mesosMaster) { + console.write("Invalid Mesos master address."); + return false; + } + if (!arguments.outputDir) { + console.write("Invalid output directory."); + return false; + } + + if (!directoryExists(arguments.outputDir)) { + console.write("Could not find output directory."); + return false; + } + break; + default : + console.write("Run mode was not specified."); + return false; + } + return true; +} + +function directoryExists(directory) { + var fs = require("fs"); + try { + stats = fs.statSync(directory); + if (!stats.isDirectory()) { + return false; + } + } + catch (e) { + return false; + } + + return true; +} + +module.exports.parse = parse; +module.exports.validate = validate; \ No newline at end of file diff --git a/nodejs/src/rendler.js b/nodejs/src/rendler.js new file mode 100644 index 0000000..f86dd02 --- /dev/null +++ b/nodejs/src/rendler.js @@ -0,0 +1,29 @@ +var args = require("./arguments.js"); +var mesosApi = require('mesosApi')(0); + +function main() { + var arguments = args.parse(process.argv); + if (!arguments || !args.validate(args)) + return -1; + + switch (arguments.runMode) { + case "executor": + return runExecutor(arguments.executorName); + case "scheduler" : + return runScheduler(arguments.mesosMaster, arguments.startUrl, arguments.outputDir, arguments.runAsUser); + default: + return -1; + } +} + +function runScheduler(mesosMaster, startUrl, outputDir, runAsUser) { + //TODO: + return -1; +} + +function runExecutor(executorName) { + //TODO: + return -1; +} + +module.exports.main = main; \ No newline at end of file From 097e352221bff21dc7874d0e5c6bafac9988a8e7 Mon Sep 17 00:00:00 2001 From: bcrusu Date: Sat, 7 Nov 2015 13:15:29 +0200 Subject: [PATCH 07/14] partial implementation for rendlerScheduler.js --- nodejs/.gitignore | 1 + nodejs/index.js | 1 - nodejs/package.json | 3 - nodejs/src/arguments.js | 25 ++-- nodejs/src/rendler.js | 48 ++++++-- nodejs/src/rendlerScheduler.js | 208 +++++++++++++++++++++++++++++++++ nodejs/support/cluster.sh | 111 ++++++++++++++++++ 7 files changed, 370 insertions(+), 27 deletions(-) create mode 100644 nodejs/src/rendlerScheduler.js create mode 100755 nodejs/support/cluster.sh diff --git a/nodejs/.gitignore b/nodejs/.gitignore index 1c0eb6e..b3ff1bb 100644 --- a/nodejs/.gitignore +++ b/nodejs/.gitignore @@ -1,3 +1,4 @@ /.idea /node_modules +/support/work *.iml diff --git a/nodejs/index.js b/nodejs/index.js index 2dd84c1..919deab 100644 --- a/nodejs/index.js +++ b/nodejs/index.js @@ -1,3 +1,2 @@ var rendler = require("./src/rendler.js"); - rendler.main(); \ No newline at end of file diff --git a/nodejs/package.json b/nodejs/package.json index 3cdbd7a..441ab1a 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -2,9 +2,6 @@ "name": "rendler", "version": "0.0.1", "private": true, - "scripts": { - "start": "node index.js" - }, "dependencies": { "mesosApi": ">0.0.0" } diff --git a/nodejs/src/arguments.js b/nodejs/src/arguments.js index 9ba352e..06ed0b9 100644 --- a/nodejs/src/arguments.js +++ b/nodejs/src/arguments.js @@ -9,7 +9,7 @@ function parse(argsArray) { argsArray.forEach(function (arg) { if (arg.indexOf("-executor=") === 0) { if (runMode !== undefined) { - console.write("Invalid run mode detected. Check the 'executor' argument!"); + console.log("Invalid run mode detected. Check the 'executor' argument!"); return null; } @@ -18,7 +18,7 @@ function parse(argsArray) { } else if (arg.indexOf("-scheduler") === 0) { if (runMode !== undefined) { - console.write("Invalid run mode detected. Check the 'scheduler' argument!"); + console.log("Invalid run mode detected. Check the 'scheduler' argument!"); return null; } @@ -26,7 +26,7 @@ function parse(argsArray) { } else if (arg.indexOf("-master=") === 0) { if (mesosMaster !== undefined) { - console.write("Mesos master option can be specified only once."); + console.log("Mesos master option can be specified only once."); return null; } @@ -34,7 +34,7 @@ function parse(argsArray) { } else if (arg.indexOf("-output=") === 0) { if (outputDir !== undefined) { - console.write("Output directory option can be specified only once."); + console.log("Output directory option can be specified only once."); return null; } @@ -42,7 +42,7 @@ function parse(argsArray) { } else if (arg.indexOf("-starturl=") === 0) { if (startUrl !== undefined) { - console.write("Start URL option can be specified only once."); + console.log("Start URL option can be specified only once."); return null; } @@ -50,12 +50,15 @@ function parse(argsArray) { } else if (arg.indexOf("-user=") === 0) { if (runAsUser !== undefined) { - console.write("User option can be specified only once."); + console.log("User option can be specified only once."); return null; } runAsUser = arg.substring("-user=".length); } + else { + console.log("Unknown argument detected: " + arg); + } }); return { @@ -72,27 +75,27 @@ function validate(arguments) { switch (arguments.runMode) { case "executor": if (!arguments.executorName) { - console.write("Invalid executor name."); + console.log("Invalid executor name."); return false; } break; case "scheduler" : if (!arguments.mesosMaster) { - console.write("Invalid Mesos master address."); + console.log("Invalid Mesos master address."); return false; } if (!arguments.outputDir) { - console.write("Invalid output directory."); + console.log("Invalid output directory."); return false; } if (!directoryExists(arguments.outputDir)) { - console.write("Could not find output directory."); + console.log("Could not find output directory."); return false; } break; default : - console.write("Run mode was not specified."); + console.log("Run mode was not specified."); return false; } return true; diff --git a/nodejs/src/rendler.js b/nodejs/src/rendler.js index f86dd02..b5b47d1 100644 --- a/nodejs/src/rendler.js +++ b/nodejs/src/rendler.js @@ -1,24 +1,48 @@ -var args = require("./arguments.js"); -var mesosApi = require('mesosApi')(0); +var Arguments = require("./arguments.js"); +const MesosApi = require('mesosApi')(0); +const Protos = MesosApi.protos.mesos; +const RendlerScheduler = require('./rendlerScheduler.js'); function main() { - var arguments = args.parse(process.argv); - if (!arguments || !args.validate(args)) - return -1; + var args = Arguments.parse(process.argv.slice(2)); + if (!args || !Arguments.validate(args)) + return; - switch (arguments.runMode) { + switch (args.runMode) { case "executor": - return runExecutor(arguments.executorName); + runExecutor(args.executorName); + break; case "scheduler" : - return runScheduler(arguments.mesosMaster, arguments.startUrl, arguments.outputDir, arguments.runAsUser); - default: - return -1; + runScheduler(args.mesosMaster, args.startUrl, args.outputDir, args.runAsUser); + break; } } function runScheduler(mesosMaster, startUrl, outputDir, runAsUser) { - //TODO: - return -1; + var frameworkInfo = new Protos.FrameworkInfo({ + id: { + value: "Rendler" + }, + name: "Rendler (Node.js)", + failover_timeout: 5, //seconds + checkpoint: false, + user: runAsUser + }); + + if (!startUrl) + startUrl = "https://mesosphere.com"; + + var scheduler = new RendlerScheduler(startUrl, outputDir, runAsUser); + var driver = MesosApi.createSchedulerDriver(scheduler, frameworkInfo, mesosMaster); + + console.log("Running scheduler driver..."); + driver.run() + .then(function (status) { + console.log("Scheduler driver finished with status: " + status); + }) + .catch(function (error) { + console.log("Unexpected error: " + error); + }); } function runExecutor(executorName) { diff --git a/nodejs/src/rendlerScheduler.js b/nodejs/src/rendlerScheduler.js new file mode 100644 index 0000000..ff69fb1 --- /dev/null +++ b/nodejs/src/rendlerScheduler.js @@ -0,0 +1,208 @@ +const util = require('util'); +const MesosApi = require('mesosApi')(0); +const Protos = MesosApi.protos.mesos; +const EventEmitter = require('events'); + +const MaxTasksToRun = 256; // limit for demonstration purpose +const RenderCpus = 1; +const RenderMem = 128; +const CrawlCpus = 0.5; +const CrawlMem = 64; + +function RendlerScheduler(startUrl, outputDir, runAsUser) { + EventEmitter.call(this); + + var _renderQueue = [ startUrl ]; + var _crawlQueue = [ startUrl ]; + var _crawled = []; + var _launchedTasks = 0; + + function onRegistered(driver, frameworkId, masterInfo) { + console.log("Registered with Mesos master. FrameworkId=" + frameworkId.value); + } + + function onResourceOffers(driver, offers) { + for (var i = 0; i < offers.length; i++) + { + var offer = offers[i]; + var tasks = []; + var resourcesCounter = new ResourcesCounter(offer); + var done; + do + { + done = true; + + var renderUrl = _renderQueue.pop(); + if (renderUrl && resourcesCounter.hasRenderTaskResources()) + { + tasks.push(getRenderTaskInfo(offer, ++_launchedTasks, renderUrl)); + resourcesCounter.subtractRenderResources(); + done = false; + } + + var crawlUrl = _crawlQueue.pop(); + if (crawlUrl && resourcesCounter.hasCrawlTaskResources()) + { + tasks.push(getCrawlTaskInfo(offer, ++_launchedTasks, crawlUrl)); + resourcesCounter.subtractCrawlResources(); + _crawled.push(crawlUrl); + done = false; + } + } while (!done); + + if (tasks.length > 0) { + driver.launchTasks ([offer.id], tasks); + } + else + driver.declineOffer(offer.id); + } + } + + function onStatusUpdate(driver, status) { + //TODO: + } + + function onFrameworkMessage(driver, executorId, slaveId, data) { + //TODO: + } + + function onError(driver, message) { + console.log("Error: " + message); + } + + function getRenderTaskInfo(offer, uniqueId, url) + { + return new Protos.TaskInfo({ + name: "Rendler.Render_" + uniqueId, + task_id: new Protos.TaskID({ value: uniqueId.toString() }), + slave_id: offer.slave_id, + resources: [ + new Protos.Resource({ + name: "cpus", + type: Protos.Value.Type.SCALAR, + scalar: new Protos.Value.Scalar({ value: RenderCpus }) + }), + new Protos.Resource({ + name: "mem", + type: Protos.Value.Type.SCALAR, + scalar: new Protos.Value.Scalar({ value: RenderMem }) + }) + ], + executor: new Protos.ExecutorInfo({ + executor_id: new Protos.ExecutorID({ value: "RenderExecutor" }), + command: new Protos.CommandInfo({ + value: "mono rendler.exe -executor=render", //TODO + user: runAsUser, + uris: [ + new Protos.CommandInfo.URI({ + cache: false, + extract: true, + value: "./rendler.tar.gz", // relative to "frameworks_home" mesos-slave command argument + executable: false + }) + ] + }), + data: "" //TODO: Encoding.UTF8.GetBytes (_outputDir) + }), + data: "" //TODO: Encoding.UTF8.GetBytes (url) + }); + } + + function getCrawlTaskInfo(offer, uniqueId, url) + { + return new Protos.TaskInfo({ + name: "Rendler.Crawl_" + uniqueId, + task_id: new Protos.TaskID({ value: uniqueId.toString() }), + slave_id: offer.slave_id, + resources: [ + new Protos.Resource({ + name: "cpus", + type: Protos.Value.Type.SCALAR, + scalar: new Protos.Value.Scalar({ value: CrawlCpus }) + }), + new Protos.Resource({ + name: "mem", + type: Protos.Value.Type.SCALAR, + scalar: new Protos.Value.Scalar({ value: CrawlMem }) + }) + ], + executor: new Protos.ExecutorInfo({ + executor_id: new Protos.ExecutorID({ value: "CrawlExecutor" }), + command: new Protos.CommandInfo({ + value: "mono rendler.exe -executor=crawl", //TODO + user: runAsUser, + uris: [ + new Protos.CommandInfo.URI({ + cache: false, + extract: true, + value: "./rendler.tar.gz", // relative to "frameworks_home" mesos-slave command argument + executable: false + }) + ] + }), + }), + data: "" //TODO: Encoding.UTF8.GetBytes (url) + }); + } + + function ResourcesCounter(offer) + { + var _cpus = 0; + var _mem = 0; + + var cpusResource = getResource("cpus"); + if (cpusResource) + _cpus = cpusResource.scalar.value; + + var memResource = getResource("mem"); + if (memResource) + _mem = memResource.scalar.value; + + function getResource(name){ + return offer.resources.find(function (r){ + return r.name === name; + }); + } + + function subtract(cpus, mem) + { + _cpus = _cpus - cpus; + _mem = _mem - mem; + } + + function hasResources(cpus, mem) + { + return _cpus >= cpus && _mem >= mem; + } + + var result = {}; + + result.hasRenderTaskResources = function() { + return hasResources(RenderCpus, RenderMem); + }; + + result.hasCrawlTaskResources = function() { + return hasResources(CrawlCpus, CrawlMem); + }; + + result.subtractRenderResources = function() { + subtract(RenderCpus, RenderMem); + }; + + result.subtractCrawlResources = function () { + subtract(CrawlCpus, CrawlMem); + }; + + return result; + } + + this.on("registered", onRegistered); + this.on("resourceOffers", onResourceOffers); + this.on("statusUpdate", onStatusUpdate); + this.on("frameworkMessage", onFrameworkMessage); + this.on("error", onError); +} + +util.inherits(RendlerScheduler, EventEmitter); + +module.exports = RendlerScheduler; \ No newline at end of file diff --git a/nodejs/support/cluster.sh b/nodejs/support/cluster.sh new file mode 100755 index 0000000..cde60a3 --- /dev/null +++ b/nodejs/support/cluster.sh @@ -0,0 +1,111 @@ +#!/bin/bash + +SCRIPTDIR=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd) + +CLUSTER_WORK_DIR=${SCRIPTDIR}/work +RENDLER_OUTPUT_DIR=/tmp/rendlerout + +APPBUILDDIR=${SCRIPTDIR}/../src/main/mesosclr.Rendler/bin/Debug + +copy_files() { + if [ ! -d "$CLUSTER_WORK_DIR" ]; then + echo "Creating cluster work dir: $CLUSTER_WORK_DIR ..." + mkdir "$CLUSTER_WORK_DIR" + fi + + if [ ! -d "$RENDLER_OUTPUT_DIR" ]; then + echo "Creating rendler output dir: $RENDLER_OUTPUT_DIR ..." + mkdir "$RENDLER_OUTPUT_DIR" + chmod 777 $RENDLER_OUTPUT_DIR + fi + + + #cd $APPBUILDDIR + #tar -czf ${CLUSTER_WORK_DIR}/rendler.tar.gz * + #cd - +} + +start_cluster() { + copy_files + + export LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH} + + mesos-master --cluster=mesosclr --ip=127.0.0.50 --port=5050 --allocation_interval=1secs --registry=in_memory --quorum=1 --quiet \ + --log_dir=${CLUSTER_WORK_DIR}/master/logs --work_dir=${CLUSTER_WORK_DIR}/master/data & + + sleep 0.5s + + mesos-slave --master=127.0.0.50:5050 --ip=127.0.0.51 --port=5051 --resources="cpus:2;mem:512" --attributes=name:slave1 --quiet \ + --frameworks_home=${CLUSTER_WORK_DIR} --log_dir=${CLUSTER_WORK_DIR}/slave1/logs --work_dir=${CLUSTER_WORK_DIR}/slave1/data & + + sleep 0.2s + + mesos-slave --master=127.0.0.50:5050 --ip=127.0.0.52 --port=5052 --resources="cpus:2;mem:512" --attributes=name:slave2 --quiet \ + --frameworks_home=${CLUSTER_WORK_DIR} --log_dir=${CLUSTER_WORK_DIR}/slave2/logs --work_dir=${CLUSTER_WORK_DIR}/slave2/data & + + sleep 0.2s + + mesos-slave --master=127.0.0.50:5050 --ip=127.0.0.53 --port=5053 --resources="cpus:2;mem:512" --attributes=name:slave3 --quiet \ + --frameworks_home=${CLUSTER_WORK_DIR} --log_dir=${CLUSTER_WORK_DIR}/slave3/logs --work_dir=${CLUSTER_WORK_DIR}/slave3/data & +} + +stop_cluster() { + killall -q mesos-slave + killall -q mesos-master +} + +clean() { + echo "Removing cluster work dir at: $CLUSTER_WORK_DIR ..." + rm -rf "$CLUSTER_WORK_DIR" + + echo "Removing temp Mesos dir at: /tmp/mesos ..." + rm -rf /tmp/mesos + + echo "Removing Rendler output dir at: $RENDLER_OUTPUT_DIR ..." + rm -rf $RENDLER_OUTPUT_DIR +} + +if [ "$(id -u)" != "0" ]; then + echo "Mesos requires to be executed as root." + exit 1 +fi + +if [ -z "$SCRIPTDIR" ]; then + echo "Could not detect current script dir..." + exit 1 +fi + +case "$1" in + start) + echo "Starting..." + start_cluster + echo "Done." + ;; + stop) + echo "Stopping..." + stop_cluster + echo "Done." + ;; + clean) + echo "Cleaning cluster dir..." + clean + echo "Done." + ;; + restart) + echo "Stopping..." + stop_cluster + + if [ "$2" = "-c" ]; then + echo "Cleaning cluster dir..." + clean + fi + + echo "Starting..." + start_cluster + echo "Done." + ;; + *) + echo "Usage: cluster {start|stop|restart|clean}" + exit 1 + ;; +esac From 092a2cdfd92e3545990e06e00ddd0eca813ac956 Mon Sep 17 00:00:00 2001 From: bcrusu Date: Sat, 7 Nov 2015 15:08:59 +0200 Subject: [PATCH 08/14] added renderExecutor.js --- nodejs/index.js | 2 +- nodejs/package.json | 3 +- nodejs/src/executors/executorUtil.js | 31 +++++++++++ nodejs/src/executors/renderExecutor.js | 73 ++++++++++++++++++++++++++ nodejs/src/rendler.js | 4 +- nodejs/src/rendlerScheduler.js | 65 ++++++++++------------- 6 files changed, 138 insertions(+), 40 deletions(-) create mode 100644 nodejs/src/executors/executorUtil.js create mode 100644 nodejs/src/executors/renderExecutor.js diff --git a/nodejs/index.js b/nodejs/index.js index 919deab..738f423 100644 --- a/nodejs/index.js +++ b/nodejs/index.js @@ -1,2 +1,2 @@ -var rendler = require("./src/rendler.js"); +var rendler = require("./src/rendler"); rendler.main(); \ No newline at end of file diff --git a/nodejs/package.json b/nodejs/package.json index 441ab1a..c5e7dee 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -3,6 +3,7 @@ "version": "0.0.1", "private": true, "dependencies": { - "mesosApi": ">0.0.0" + "mesosApi": ">0.0.0", + "bytebuffer": "~4 >=4.1" } } \ No newline at end of file diff --git a/nodejs/src/executors/executorUtil.js b/nodejs/src/executors/executorUtil.js new file mode 100644 index 0000000..f4b197e --- /dev/null +++ b/nodejs/src/executors/executorUtil.js @@ -0,0 +1,31 @@ +const MesosApi = require('mesosApi')(0); +const Protos = MesosApi.protos.mesos; + +exports.sendTaskRunningStatus = function (driver, taskId) { + driver.sendStatusUpdate(new Protos.TaskStatus({ + task_id: taskId, + state: Protos.TaskState.TASK_RUNNING + })); +}; + +exports.sendTaskFinishedStatus = function (driver, taskId) { + driver.sendStatusUpdate(new Protos.TaskStatus({ + task_id: taskId, + state: Protos.TaskState.TASK_FINISHED + })); +}; + +exports.sendTaskErrorStatus = function (driver, taskId) { + driver.sendStatusUpdate(new Protos.TaskStatus({ + task_id: taskId, + state: Protos.TaskState.TASK_ERROR + })); +}; + +exports.isTerminalTaskState = function (taskState) { + return taskState === Protos.TaskState.TASK_FINISHED || + taskState === Protos.TaskState.TASK_FAILED || + taskState === Protos.TaskState.TASK_KILLED || + taskState === Protos.TaskState.TASK_LOST || + taskState === Protos.TaskState.TASK_ERROR; +}; \ No newline at end of file diff --git a/nodejs/src/executors/renderExecutor.js b/nodejs/src/executors/renderExecutor.js new file mode 100644 index 0000000..f057cea --- /dev/null +++ b/nodejs/src/executors/renderExecutor.js @@ -0,0 +1,73 @@ +const util = require('util'); +const path = require('path'); +const child_process = require('child_process'); +const MesosApi = require('mesosApi')(0); +const Protos = MesosApi.protos.mesos; +const EventEmitter = require('events'); +const executorUtil = require('./executorUtil'); + +function RendlerScheduler(startUrl, outputDir, runAsUser) { + EventEmitter.call(this); + + var _outputDir; + + function onRegistered(driver, executorInfo, frameworkInfo, slaveInfo) { + _outputDir = executorInfo.data.toUTF8(); + console.log("Registered executor on host " + slaveInfo.hostname + ". Output dir is '" + _outputDir + "'."; + } + + function onLaunchTask(driver, taskInfo) { + console.log("Launching render task '" + taskInfo.task_id.value + "'..."); + + try { + executorUtil.sendTaskRunningStatus(driver, taskInfo.task_id); + + var url = taskInfo.data.toUTF8(); + var imageFileName = runRendering(taskInfo.task_id, url); + + sendRenderResultMessage(driver, url, imageFileName); + executorUtil.sendTaskFinishedStatus(driver, taskInfo.task_id); + + } + catch (exception) { + console.log("Exception during render operation: " + exception); + executorUtil.sendTaskErrorStatus(driver, taskInfo.task_id); + } + } + + function onError(driver, message) { + console.log("Error: " + message); + } + + function runRendering(taskId, url) + { + var imagePath = path.join(_outputDir, taskId.value + ".png"); + var options = { + timeout: 1000 * 60 + }; + + child_process.spawnSync("phantomjs", "render.js \"{url}\" \"{imagePath}\"", options); + + return imagePath; + } + + function sendRenderResultMessage(driver, url, fileName) { + var message = new { + type: "RenderResult", + body: { + url: url, + fileName: fileName + } + }; + + driver.sendFrameworkMessage(JSON.stringify(message)); + } + + this.on("registered", onRegistered); + this.on("launchTask", onLaunchTask); + this.on("error", onError); +} + +util.inherits(RendlerScheduler, EventEmitter); + +module.exports = RendlerScheduler; \ No newline at end of file diff --git a/nodejs/src/rendler.js b/nodejs/src/rendler.js index b5b47d1..fdbf76b 100644 --- a/nodejs/src/rendler.js +++ b/nodejs/src/rendler.js @@ -1,7 +1,7 @@ -var Arguments = require("./arguments.js"); +var Arguments = require("./arguments"); const MesosApi = require('mesosApi')(0); const Protos = MesosApi.protos.mesos; -const RendlerScheduler = require('./rendlerScheduler.js'); +const RendlerScheduler = require('./rendlerScheduler'); function main() { var args = Arguments.parse(process.argv.slice(2)); diff --git a/nodejs/src/rendlerScheduler.js b/nodejs/src/rendlerScheduler.js index ff69fb1..4edfa62 100644 --- a/nodejs/src/rendlerScheduler.js +++ b/nodejs/src/rendlerScheduler.js @@ -2,6 +2,7 @@ const util = require('util'); const MesosApi = require('mesosApi')(0); const Protos = MesosApi.protos.mesos; const EventEmitter = require('events'); +const ByteBuffer = require('bytebuffer'); const MaxTasksToRun = 256; // limit for demonstration purpose const RenderCpus = 1; @@ -12,8 +13,8 @@ const CrawlMem = 64; function RendlerScheduler(startUrl, outputDir, runAsUser) { EventEmitter.call(this); - var _renderQueue = [ startUrl ]; - var _crawlQueue = [ startUrl ]; + var _renderQueue = [startUrl]; + var _crawlQueue = [startUrl]; var _crawled = []; var _launchedTasks = 0; @@ -22,8 +23,7 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { } function onResourceOffers(driver, offers) { - for (var i = 0; i < offers.length; i++) - { + for (var i = 0; i < offers.length; i++) { var offer = offers[i]; var tasks = []; var resourcesCounter = new ResourcesCounter(offer); @@ -33,16 +33,14 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { done = true; var renderUrl = _renderQueue.pop(); - if (renderUrl && resourcesCounter.hasRenderTaskResources()) - { + if (renderUrl && resourcesCounter.hasRenderTaskResources()) { tasks.push(getRenderTaskInfo(offer, ++_launchedTasks, renderUrl)); resourcesCounter.subtractRenderResources(); done = false; } var crawlUrl = _crawlQueue.pop(); - if (crawlUrl && resourcesCounter.hasCrawlTaskResources()) - { + if (crawlUrl && resourcesCounter.hasCrawlTaskResources()) { tasks.push(getCrawlTaskInfo(offer, ++_launchedTasks, crawlUrl)); resourcesCounter.subtractCrawlResources(); _crawled.push(crawlUrl); @@ -51,7 +49,7 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { } while (!done); if (tasks.length > 0) { - driver.launchTasks ([offer.id], tasks); + driver.launchTasks([offer.id], tasks); } else driver.declineOffer(offer.id); @@ -70,26 +68,25 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { console.log("Error: " + message); } - function getRenderTaskInfo(offer, uniqueId, url) - { + function getRenderTaskInfo(offer, uniqueId, url) { return new Protos.TaskInfo({ name: "Rendler.Render_" + uniqueId, - task_id: new Protos.TaskID({ value: uniqueId.toString() }), + task_id: new Protos.TaskID({value: uniqueId.toString()}), slave_id: offer.slave_id, resources: [ new Protos.Resource({ name: "cpus", type: Protos.Value.Type.SCALAR, - scalar: new Protos.Value.Scalar({ value: RenderCpus }) - }), + scalar: new Protos.Value.Scalar({value: RenderCpus}) + }), new Protos.Resource({ name: "mem", type: Protos.Value.Type.SCALAR, - scalar: new Protos.Value.Scalar({ value: RenderMem }) + scalar: new Protos.Value.Scalar({value: RenderMem}) }) ], executor: new Protos.ExecutorInfo({ - executor_id: new Protos.ExecutorID({ value: "RenderExecutor" }), + executor_id: new Protos.ExecutorID({value: "RenderExecutor"}), command: new Protos.CommandInfo({ value: "mono rendler.exe -executor=render", //TODO user: runAsUser, @@ -102,32 +99,31 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { }) ] }), - data: "" //TODO: Encoding.UTF8.GetBytes (_outputDir) + data: ByteBuffer.fromUTF8(_outputDir) }), - data: "" //TODO: Encoding.UTF8.GetBytes (url) + data: ByteBuffer.fromUTF8(url) }); } - function getCrawlTaskInfo(offer, uniqueId, url) - { + function getCrawlTaskInfo(offer, uniqueId, url) { return new Protos.TaskInfo({ name: "Rendler.Crawl_" + uniqueId, - task_id: new Protos.TaskID({ value: uniqueId.toString() }), + task_id: new Protos.TaskID({value: uniqueId.toString()}), slave_id: offer.slave_id, resources: [ new Protos.Resource({ name: "cpus", type: Protos.Value.Type.SCALAR, - scalar: new Protos.Value.Scalar({ value: CrawlCpus }) + scalar: new Protos.Value.Scalar({value: CrawlCpus}) }), new Protos.Resource({ name: "mem", type: Protos.Value.Type.SCALAR, - scalar: new Protos.Value.Scalar({ value: CrawlMem }) + scalar: new Protos.Value.Scalar({value: CrawlMem}) }) ], executor: new Protos.ExecutorInfo({ - executor_id: new Protos.ExecutorID({ value: "CrawlExecutor" }), + executor_id: new Protos.ExecutorID({value: "CrawlExecutor"}), command: new Protos.CommandInfo({ value: "mono rendler.exe -executor=crawl", //TODO user: runAsUser, @@ -141,12 +137,11 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { ] }), }), - data: "" //TODO: Encoding.UTF8.GetBytes (url) + data: ByteBuffer.fromUTF8(url) }); } - function ResourcesCounter(offer) - { + function ResourcesCounter(offer) { var _cpus = 0; var _mem = 0; @@ -158,34 +153,32 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { if (memResource) _mem = memResource.scalar.value; - function getResource(name){ - return offer.resources.find(function (r){ + function getResource(name) { + return offer.resources.find(function (r) { return r.name === name; }); } - function subtract(cpus, mem) - { + function subtract(cpus, mem) { _cpus = _cpus - cpus; _mem = _mem - mem; } - function hasResources(cpus, mem) - { + function hasResources(cpus, mem) { return _cpus >= cpus && _mem >= mem; } var result = {}; - result.hasRenderTaskResources = function() { + result.hasRenderTaskResources = function () { return hasResources(RenderCpus, RenderMem); }; - result.hasCrawlTaskResources = function() { + result.hasCrawlTaskResources = function () { return hasResources(CrawlCpus, CrawlMem); }; - result.subtractRenderResources = function() { + result.subtractRenderResources = function () { subtract(RenderCpus, RenderMem); }; From 38a01f2e1ea97d18293c547b5e1c595fd4710da5 Mon Sep 17 00:00:00 2001 From: bcrusu Date: Sun, 8 Nov 2015 00:21:20 +0200 Subject: [PATCH 09/14] added crawlExecutor.js + async phantomjs execution --- nodejs/render.js | 23 +++++++ nodejs/src/executors/crawlExecutor.js | 92 ++++++++++++++++++++++++++ nodejs/src/executors/renderExecutor.js | 50 ++++++-------- nodejs/src/rendlerScheduler.js | 2 +- 4 files changed, 135 insertions(+), 32 deletions(-) create mode 100644 nodejs/render.js create mode 100644 nodejs/src/executors/crawlExecutor.js diff --git a/nodejs/render.js b/nodejs/render.js new file mode 100644 index 0000000..6d9fe57 --- /dev/null +++ b/nodejs/render.js @@ -0,0 +1,23 @@ +var page = require('webpage').create(), + system = require('system'), + address, output, size; + +if (system.args.length < 3) { + console.log('Usage: render.js '); + phantom.exit(1); +} + +address = system.args[1]; +destination = system.args[2]; + +console.log('Rendering ' + address + ' to ' + destination); + +page.viewportSize = { + width: 1024, + height: 768 +}; + +page.open(address, function() { + page.render(destination); + phantom.exit(); +}); diff --git a/nodejs/src/executors/crawlExecutor.js b/nodejs/src/executors/crawlExecutor.js new file mode 100644 index 0000000..98c60b8 --- /dev/null +++ b/nodejs/src/executors/crawlExecutor.js @@ -0,0 +1,92 @@ +const util = require('util'); +const http = require('http'); +const https = require('https'); +const urlLib = require('url'); +const EventEmitter = require('events'); +const executorUtil = require('./executorUtil'); + +function CrawlExecutor() { + EventEmitter.call(this); + + function onRegistered(driver, executorInfo, frameworkInfo, slaveInfo) { + console.log("Registered executor on host " + slaveInfo.hostname); + } + + function onLaunchTask(driver, taskInfo) { + console.log("Launching crawl task '" + taskInfo.task_id.value + "'..."); + executorUtil.sendTaskRunningStatus(driver, taskInfo.task_id); + + var url = taskInfo.data.toUTF8(); + + var httpx = getHttpObject(url); + if (!httpx) { + console.log("Unrecognized url protocol: " + url); + executorUtil.sendTaskFinishedStatus(driver, taskInfo.task_id); + return; + } + + httpx.get(url, function (response) { + var links = []; + response.setEncoding('utf8'); + + response.on('data', function (chunk) { + links = links.concat(parseLinks(chunk)); + }); + response.on('end', function () { + sendCrawlResultMessage(driver, url, links); + executorUtil.sendTaskFinishedStatus(driver, taskInfo.task_id); + }); + }).on('error', function (error) { + console.log("Error during crawl operation: " + error); + executorUtil.sendTaskErrorStatus(driver, taskInfo.task_id); + }); + } + + function onError(driver, message) { + console.log("Error: " + message); + } + + function sendCrawlResultMessage(driver, url, links) { + var message = new { + type: "CrawlResult", + body: { + url: url, + links: links + } + }; + + driver.sendFrameworkMessage(JSON.stringify(message)); + } + + function getHttpObject(url) { + var parsed = urlLib.parse(url); + switch (parsed.protocol) { + case "https:": + return https; + case "http:": + return http; + default : + return null; + } + } + + function parseLinks(content) { + var regex = /]+href=[\"']?([^\"\'>]+)[\"\']?[^>]*>.+?<\/a>/gi; + var links = []; + var array; + while ((array = regex.exec(content)) !== null) { + var link = array[1]; + links.push(link); + } + + return links; + } + + this.on("registered", onRegistered); + this.on("launchTask", onLaunchTask); + this.on("error", onError); +} + +util.inherits(CrawlExecutor, EventEmitter); + +module.exports = CrawlExecutor; \ No newline at end of file diff --git a/nodejs/src/executors/renderExecutor.js b/nodejs/src/executors/renderExecutor.js index f057cea..d1e8070 100644 --- a/nodejs/src/executors/renderExecutor.js +++ b/nodejs/src/executors/renderExecutor.js @@ -1,56 +1,44 @@ const util = require('util'); const path = require('path'); const child_process = require('child_process'); -const MesosApi = require('mesosApi')(0); -const Protos = MesosApi.protos.mesos; const EventEmitter = require('events'); const executorUtil = require('./executorUtil'); -function RendlerScheduler(startUrl, outputDir, runAsUser) { +function RenderExecutor() { EventEmitter.call(this); var _outputDir; function onRegistered(driver, executorInfo, frameworkInfo, slaveInfo) { _outputDir = executorInfo.data.toUTF8(); - console.log("Registered executor on host " + slaveInfo.hostname + ". Output dir is '" + _outputDir + "'."; + console.log("Registered executor on host " + slaveInfo.hostname + ". Output dir is '" + _outputDir + "'."); } function onLaunchTask(driver, taskInfo) { console.log("Launching render task '" + taskInfo.task_id.value + "'..."); + executorUtil.sendTaskRunningStatus(driver, taskInfo.task_id); - try { - executorUtil.sendTaskRunningStatus(driver, taskInfo.task_id); - - var url = taskInfo.data.toUTF8(); - var imageFileName = runRendering(taskInfo.task_id, url); - - sendRenderResultMessage(driver, url, imageFileName); - executorUtil.sendTaskFinishedStatus(driver, taskInfo.task_id); + var url = taskInfo.data.toUTF8(); + var fileName = path.join(_outputDir, taskInfo.task_id.value + ".png"); + var spawnOptions = { + timeout: 1000 * 30 + }; - } - catch (exception) { - console.log("Exception during render operation: " + exception); - executorUtil.sendTaskErrorStatus(driver, taskInfo.task_id); - } + child_process.spawn("phantomjs", ["render.js", url, fileName], spawnOptions) + .on('close', function () { + sendRenderResultMessage(driver, url, fileName); + executorUtil.sendTaskFinishedStatus(driver, taskInfo.task_id); + }) + .on('error', function (err) { + console.log("Error during render operation: " + err); + executorUtil.sendTaskErrorStatus(driver, taskInfo.task_id); + }); } function onError(driver, message) { console.log("Error: " + message); } - function runRendering(taskId, url) - { - var imagePath = path.join(_outputDir, taskId.value + ".png"); - var options = { - timeout: 1000 * 60 - }; - - child_process.spawnSync("phantomjs", "render.js \"{url}\" \"{imagePath}\"", options); - - return imagePath; - } - function sendRenderResultMessage(driver, url, fileName) { var message = new { type: "RenderResult", @@ -68,6 +56,6 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { this.on("error", onError); } -util.inherits(RendlerScheduler, EventEmitter); +util.inherits(RenderExecutor, EventEmitter); -module.exports = RendlerScheduler; \ No newline at end of file +module.exports = RenderExecutor; \ No newline at end of file diff --git a/nodejs/src/rendlerScheduler.js b/nodejs/src/rendlerScheduler.js index 4edfa62..98185d2 100644 --- a/nodejs/src/rendlerScheduler.js +++ b/nodejs/src/rendlerScheduler.js @@ -99,7 +99,7 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { }) ] }), - data: ByteBuffer.fromUTF8(_outputDir) + data: ByteBuffer.fromUTF8(outputDir) }), data: ByteBuffer.fromUTF8(url) }); From 7a7e5a6d0c4337580a214a96257273fd3e1c263b Mon Sep 17 00:00:00 2001 From: bcrusu Date: Sun, 8 Nov 2015 13:50:32 +0200 Subject: [PATCH 10/14] finished rendlerScheduler implementation --- nodejs/src/arguments.js | 2 +- nodejs/src/executors/crawlExecutor.js | 4 +- nodejs/src/executors/executorUtil.js | 8 --- nodejs/src/rendlerScheduler.js | 72 +++++++++++++++++++++++++-- 4 files changed, 72 insertions(+), 14 deletions(-) diff --git a/nodejs/src/arguments.js b/nodejs/src/arguments.js index 06ed0b9..e0eabd8 100644 --- a/nodejs/src/arguments.js +++ b/nodejs/src/arguments.js @@ -104,7 +104,7 @@ function validate(arguments) { function directoryExists(directory) { var fs = require("fs"); try { - stats = fs.statSync(directory); + var stats = fs.statSync(directory); if (!stats.isDirectory()) { return false; } diff --git a/nodejs/src/executors/crawlExecutor.js b/nodejs/src/executors/crawlExecutor.js index 98c60b8..2985ef1 100644 --- a/nodejs/src/executors/crawlExecutor.js +++ b/nodejs/src/executors/crawlExecutor.js @@ -20,7 +20,7 @@ function CrawlExecutor() { var httpx = getHttpObject(url); if (!httpx) { - console.log("Unrecognized url protocol: " + url); + console.log("Unrecognized protocol for url: " + url); executorUtil.sendTaskFinishedStatus(driver, taskInfo.task_id); return; } @@ -76,7 +76,7 @@ function CrawlExecutor() { var array; while ((array = regex.exec(content)) !== null) { var link = array[1]; - links.push(link); + links.push(link.toLowerCase()); } return links; diff --git a/nodejs/src/executors/executorUtil.js b/nodejs/src/executors/executorUtil.js index f4b197e..396e48a 100644 --- a/nodejs/src/executors/executorUtil.js +++ b/nodejs/src/executors/executorUtil.js @@ -20,12 +20,4 @@ exports.sendTaskErrorStatus = function (driver, taskId) { task_id: taskId, state: Protos.TaskState.TASK_ERROR })); -}; - -exports.isTerminalTaskState = function (taskState) { - return taskState === Protos.TaskState.TASK_FINISHED || - taskState === Protos.TaskState.TASK_FAILED || - taskState === Protos.TaskState.TASK_KILLED || - taskState === Protos.TaskState.TASK_LOST || - taskState === Protos.TaskState.TASK_ERROR; }; \ No newline at end of file diff --git a/nodejs/src/rendlerScheduler.js b/nodejs/src/rendlerScheduler.js index 98185d2..15a3ec7 100644 --- a/nodejs/src/rendlerScheduler.js +++ b/nodejs/src/rendlerScheduler.js @@ -1,4 +1,5 @@ const util = require('util'); +const path = require('path'); const MesosApi = require('mesosApi')(0); const Protos = MesosApi.protos.mesos; const EventEmitter = require('events'); @@ -17,6 +18,10 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { var _crawlQueue = [startUrl]; var _crawled = []; var _launchedTasks = 0; + var _finishedTasksCount = 0; + + var _renderResults = {}; + var _crawlResults = {}; function onRegistered(driver, frameworkId, masterInfo) { console.log("Registered with Mesos master. FrameworkId=" + frameworkId.value); @@ -57,11 +62,64 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { } function onStatusUpdate(driver, status) { - //TODO: + if (!isTerminalTaskState(status.state)) { + console.log("Status update: task " + status.task_id.value + " is in state " + status.state); + return; + } + + console.log("Status update: task " + status.task_id.value + " has terminated with state " + status.state); + + if (++_finishedTasksCount == MaxTasksToRun) { + console.log("Reached the max number of tasks to run. Stopping..."); + + var dotWritePath = path.join(outputDir, "result.dot"); + //TODO: write DOT file + driver.stop(); + } } function onFrameworkMessage(driver, executorId, slaveId, data) { - //TODO: + var url = undefined; + var message = JSON.parse(data); + + switch (message.type) { + case "CrawlResult": + url = message.body.url; + var links = message.body.links; + console.log("Framework message 'CrawlResult': got " + links.length + " links from url " + url); + + links + .filter(function (link) { + return !_crawled.some(function (crawledLink) { + return crawledLink === link; + }); + }) + .forEach(function (link) { + _crawlQueue.push(link); + _renderQueue.push(link); + }); + + // update edges: url -> links + var edges = _crawlResults[url] || []; + _crawlResults[url] = edges.concat(links); + + // empty edge list for links + links.forEach(function (link) { + if (!(link in _crawlResults)) + _crawlResults[link] = []; + }); + break; + case "RenderResult": + url = message.body.url; + var fileName = message.body.fileName; + console.log("Framework message 'RenderResult': saved " + fileName + " for url " + url); + + _renderResults[url] = fileName; + break; + default: + console.log("Unrecognized message type: " + message.type); + break; + } } function onError(driver, message) { @@ -135,7 +193,7 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { executable: false }) ] - }), + }) }), data: ByteBuffer.fromUTF8(url) }); @@ -189,6 +247,14 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { return result; } + function isTerminalTaskState(taskState) { + return taskState === Protos.TaskState.TASK_FINISHED || + taskState === Protos.TaskState.TASK_FAILED || + taskState === Protos.TaskState.TASK_KILLED || + taskState === Protos.TaskState.TASK_LOST || + taskState === Protos.TaskState.TASK_ERROR; + } + this.on("registered", onRegistered); this.on("resourceOffers", onResourceOffers); this.on("statusUpdate", onStatusUpdate); From 45159cf5b87a3200ca2bd50e3562bbdfd83ec22b Mon Sep 17 00:00:00 2001 From: bcrusu Date: Sun, 8 Nov 2015 15:00:33 +0200 Subject: [PATCH 11/14] work in progress... --- nodejs/package.json | 2 +- nodejs/src/arguments.js | 12 +++---- nodejs/src/executors/crawlExecutor.js | 2 +- nodejs/src/executors/executorUtil.js | 2 +- nodejs/src/executors/renderExecutor.js | 2 +- nodejs/src/rendler.js | 43 +++++++++++++++++++++----- nodejs/src/rendlerScheduler.js | 10 +++--- nodejs/support/cluster.sh | 9 +++--- 8 files changed, 54 insertions(+), 28 deletions(-) diff --git a/nodejs/package.json b/nodejs/package.json index c5e7dee..3e6f7ee 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -3,7 +3,7 @@ "version": "0.0.1", "private": true, "dependencies": { - "mesosApi": ">0.0.0", + "mesos-api": ">0.0.0", "bytebuffer": "~4 >=4.1" } } \ No newline at end of file diff --git a/nodejs/src/arguments.js b/nodejs/src/arguments.js index e0eabd8..ce6bf3f 100644 --- a/nodejs/src/arguments.js +++ b/nodejs/src/arguments.js @@ -62,12 +62,12 @@ function parse(argsArray) { }); return { - 'runMode': runMode, - 'mesosMaster': mesosMaster, - 'executorName': executor, - 'outputDir': outputDir, - 'startUrl': startUrl, - 'runAsUser': runAsUser + runMode: runMode, + mesosMaster: mesosMaster, + executorName: executor, + outputDir: outputDir, + startUrl: startUrl, + runAsUser: runAsUser } } diff --git a/nodejs/src/executors/crawlExecutor.js b/nodejs/src/executors/crawlExecutor.js index 2985ef1..b220ab9 100644 --- a/nodejs/src/executors/crawlExecutor.js +++ b/nodejs/src/executors/crawlExecutor.js @@ -47,7 +47,7 @@ function CrawlExecutor() { } function sendCrawlResultMessage(driver, url, links) { - var message = new { + var message = { type: "CrawlResult", body: { url: url, diff --git a/nodejs/src/executors/executorUtil.js b/nodejs/src/executors/executorUtil.js index 396e48a..f9d6e05 100644 --- a/nodejs/src/executors/executorUtil.js +++ b/nodejs/src/executors/executorUtil.js @@ -1,4 +1,4 @@ -const MesosApi = require('mesosApi')(0); +const MesosApi = require('mesos-api')(0); const Protos = MesosApi.protos.mesos; exports.sendTaskRunningStatus = function (driver, taskId) { diff --git a/nodejs/src/executors/renderExecutor.js b/nodejs/src/executors/renderExecutor.js index d1e8070..6a7c893 100644 --- a/nodejs/src/executors/renderExecutor.js +++ b/nodejs/src/executors/renderExecutor.js @@ -40,7 +40,7 @@ function RenderExecutor() { } function sendRenderResultMessage(driver, url, fileName) { - var message = new { + var message = { type: "RenderResult", body: { url: url, diff --git a/nodejs/src/rendler.js b/nodejs/src/rendler.js index fdbf76b..bd03f1f 100644 --- a/nodejs/src/rendler.js +++ b/nodejs/src/rendler.js @@ -1,12 +1,14 @@ var Arguments = require("./arguments"); -const MesosApi = require('mesosApi')(0); +const MesosApi = require("mesos-api")(0); const Protos = MesosApi.protos.mesos; -const RendlerScheduler = require('./rendlerScheduler'); +const RendlerScheduler = require("./rendlerScheduler"); +const CrawlExecutor = require("./executors/crawlExecutor"); +const RenderExecutor = require("./executors/renderExecutor"); function main() { var args = Arguments.parse(process.argv.slice(2)); if (!args || !Arguments.validate(args)) - return; + process.exit(-1); switch (args.runMode) { case "executor": @@ -38,16 +40,43 @@ function runScheduler(mesosMaster, startUrl, outputDir, runAsUser) { console.log("Running scheduler driver..."); driver.run() .then(function (status) { - console.log("Scheduler driver finished with status: " + status); + console.log("Scheduler driver finished with status: " + status); + process.exit(0); }) .catch(function (error) { - console.log("Unexpected error: " + error); + console.log("Unexpected driver error: " + error); + process.exit(-2); }); } function runExecutor(executorName) { - //TODO: - return -1; + var executor; + switch (executorName) { + case "render": + executor = new RenderExecutor(); + break; + case "crawl": + executor = new CrawlExecutor(); + break; + default: + { + console.log("Unrecognized executor: " + executorName); + process.exit(-1); + } + } + + var driver = MesosApi.createExecutorDriver(executor); + + console.log("Running executor driver..."); + driver.run() + .then(function (status) { + console.log("Executor driver finished with status: " + status); + process.exit(0); + }) + .catch(function (error) { + console.log("Unexpected driver error: " + error); + process.exit(-3); + }); } module.exports.main = main; \ No newline at end of file diff --git a/nodejs/src/rendlerScheduler.js b/nodejs/src/rendlerScheduler.js index 15a3ec7..bc98ffb 100644 --- a/nodejs/src/rendlerScheduler.js +++ b/nodejs/src/rendlerScheduler.js @@ -1,6 +1,6 @@ const util = require('util'); const path = require('path'); -const MesosApi = require('mesosApi')(0); +const MesosApi = require('mesos-api')(0); const Protos = MesosApi.protos.mesos; const EventEmitter = require('events'); const ByteBuffer = require('bytebuffer'); @@ -79,12 +79,11 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { } function onFrameworkMessage(driver, executorId, slaveId, data) { - var url = undefined; var message = JSON.parse(data); + var url = message.body.url; switch (message.type) { case "CrawlResult": - url = message.body.url; var links = message.body.links; console.log("Framework message 'CrawlResult': got " + links.length + " links from url " + url); @@ -110,7 +109,6 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { }); break; case "RenderResult": - url = message.body.url; var fileName = message.body.fileName; console.log("Framework message 'RenderResult': saved " + fileName + " for url " + url); @@ -146,7 +144,7 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { executor: new Protos.ExecutorInfo({ executor_id: new Protos.ExecutorID({value: "RenderExecutor"}), command: new Protos.CommandInfo({ - value: "mono rendler.exe -executor=render", //TODO + value: "node index.js -executor=render", user: runAsUser, uris: [ new Protos.CommandInfo.URI({ @@ -183,7 +181,7 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { executor: new Protos.ExecutorInfo({ executor_id: new Protos.ExecutorID({value: "CrawlExecutor"}), command: new Protos.CommandInfo({ - value: "mono rendler.exe -executor=crawl", //TODO + value: "node index.js -executor=crawl", user: runAsUser, uris: [ new Protos.CommandInfo.URI({ diff --git a/nodejs/support/cluster.sh b/nodejs/support/cluster.sh index cde60a3..ae70b72 100755 --- a/nodejs/support/cluster.sh +++ b/nodejs/support/cluster.sh @@ -4,8 +4,7 @@ SCRIPTDIR=$(cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd) CLUSTER_WORK_DIR=${SCRIPTDIR}/work RENDLER_OUTPUT_DIR=/tmp/rendlerout - -APPBUILDDIR=${SCRIPTDIR}/../src/main/mesosclr.Rendler/bin/Debug +APPDIR=${SCRIPTDIR}/../ copy_files() { if [ ! -d "$CLUSTER_WORK_DIR" ]; then @@ -20,9 +19,9 @@ copy_files() { fi - #cd $APPBUILDDIR - #tar -czf ${CLUSTER_WORK_DIR}/rendler.tar.gz * - #cd - + cd $APPDIR + tar -czf ${CLUSTER_WORK_DIR}/rendler.tar.gz * --exclude='support*' + cd - } start_cluster() { From b1fd8bf7548e50b8556465538a64a9e21a4172e8 Mon Sep 17 00:00:00 2001 From: bcrusu Date: Tue, 10 Nov 2015 23:41:29 +0200 Subject: [PATCH 12/14] added graph rendering to dot format --- nodejs/src/dotUtil.js | 61 +++++++++++++++++++++++++++ nodejs/src/executors/crawlExecutor.js | 13 +++++- nodejs/src/executors/executorUtil.js | 2 +- nodejs/src/rendler.js | 2 +- nodejs/src/rendlerScheduler.js | 9 ++-- 5 files changed, 80 insertions(+), 7 deletions(-) create mode 100644 nodejs/src/dotUtil.js diff --git a/nodejs/src/dotUtil.js b/nodejs/src/dotUtil.js new file mode 100644 index 0000000..c764d45 --- /dev/null +++ b/nodejs/src/dotUtil.js @@ -0,0 +1,61 @@ +const fs = require('fs'); +const endOfLine = require('os').EOL; + +function write(outputPath, nodeToChildNodes, nodeImageFileName) { + var url, nodeName; + var nodeNames = {}; + var nodeIdCounter = 0; + + var stream = fs.createWriteStream(outputPath, { + flags: 'w', + defaultEncoding: 'utf8' + }); + + stream.write("digraph G {"); + stream.write(endOfLine); + stream.write("\tnode [shape=box];"); + stream.write(endOfLine); + + for (url in nodeToChildNodes) { + nodeName = "url_" + (++nodeIdCounter); + nodeNames[url] = nodeName; + + stream.write("\t"); + stream.write(nodeName); + + var imageFileName = nodeImageFileName[url]; + if (imageFileName) { + stream.write(" [label=\"\" image=\""); + stream.write(imageFileName); + } + else { + stream.write(" [label=\""); + stream.write(url); + } + + stream.write("\"];"); + stream.write(endOfLine); + } + + stream.write(endOfLine); + + for (url in nodeToChildNodes) { + nodeName = nodeNames[url]; + var childNodes = nodeToChildNodes[url]; + for (var i = 0; i < childNodes.length; i++) { + var childNode = childNodes[i]; + var childNodeName = nodeNames[childNode]; + stream.write("\t"); + stream.write(nodeName); + stream.write(" -> "); + stream.write(childNodeName); + stream.write(";"); + stream.write(endOfLine); + } + } + + stream.write("}"); + stream.end(endOfLine); +} + +module.exports.write = write; \ No newline at end of file diff --git a/nodejs/src/executors/crawlExecutor.js b/nodejs/src/executors/crawlExecutor.js index b220ab9..e8049d7 100644 --- a/nodejs/src/executors/crawlExecutor.js +++ b/nodejs/src/executors/crawlExecutor.js @@ -70,13 +70,24 @@ function CrawlExecutor() { } } + function getIsValidLink(link) { + try { + var protocol = urlLib.parse(link).protocol; + return protocol === "https:" || protocol === "http:"; + } + catch (e) { + return false; + } + } + function parseLinks(content) { var regex = /]+href=[\"']?([^\"\'>]+)[\"\']?[^>]*>.+?<\/a>/gi; var links = []; var array; while ((array = regex.exec(content)) !== null) { var link = array[1]; - links.push(link.toLowerCase()); + if (getIsValidLink(link)) + links.push(link.toLowerCase()); } return links; diff --git a/nodejs/src/executors/executorUtil.js b/nodejs/src/executors/executorUtil.js index f9d6e05..b1bf0b5 100644 --- a/nodejs/src/executors/executorUtil.js +++ b/nodejs/src/executors/executorUtil.js @@ -1,4 +1,4 @@ -const MesosApi = require('mesos-api')(0); +const MesosApi = require('mesos-api'); const Protos = MesosApi.protos.mesos; exports.sendTaskRunningStatus = function (driver, taskId) { diff --git a/nodejs/src/rendler.js b/nodejs/src/rendler.js index bd03f1f..29353a8 100644 --- a/nodejs/src/rendler.js +++ b/nodejs/src/rendler.js @@ -1,5 +1,5 @@ var Arguments = require("./arguments"); -const MesosApi = require("mesos-api")(0); +const MesosApi = require("mesos-api"); const Protos = MesosApi.protos.mesos; const RendlerScheduler = require("./rendlerScheduler"); const CrawlExecutor = require("./executors/crawlExecutor"); diff --git a/nodejs/src/rendlerScheduler.js b/nodejs/src/rendlerScheduler.js index bc98ffb..ac4fe3a 100644 --- a/nodejs/src/rendlerScheduler.js +++ b/nodejs/src/rendlerScheduler.js @@ -1,9 +1,10 @@ const util = require('util'); const path = require('path'); -const MesosApi = require('mesos-api')(0); +const MesosApi = require('mesos-api'); const Protos = MesosApi.protos.mesos; const EventEmitter = require('events'); const ByteBuffer = require('bytebuffer'); +const dotUtil = require('./dotUtil'); const MaxTasksToRun = 256; // limit for demonstration purpose const RenderCpus = 1; @@ -73,7 +74,8 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { console.log("Reached the max number of tasks to run. Stopping..."); var dotWritePath = path.join(outputDir, "result.dot"); - //TODO: write DOT file + dotUtil.write(dotWritePath, _crawlResults, _renderResults); + driver.stop(); } } @@ -104,8 +106,7 @@ function RendlerScheduler(startUrl, outputDir, runAsUser) { // empty edge list for links links.forEach(function (link) { - if (!(link in _crawlResults)) - _crawlResults[link] = []; + _crawlResults[link] = _crawlResults[link] || []; }); break; case "RenderResult": From 3e90adc90bee30e107051c8b41219bf5486ab5bf Mon Sep 17 00:00:00 2001 From: bcrusu Date: Wed, 11 Nov 2015 16:59:44 +0200 Subject: [PATCH 13/14] Create README.md --- nodejs/README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 nodejs/README.md diff --git a/nodejs/README.md b/nodejs/README.md new file mode 100644 index 0000000..f5622e8 --- /dev/null +++ b/nodejs/README.md @@ -0,0 +1,14 @@ +## Node.js Rendler Framework + +### Preparation + +1. The implementation uses the [mesos-api](https://github.com/bcrusu/mesos-node) package, which has to be linked manually using the [npm-link](https://docs.npmjs.com/cli/link) command (npmjs.org package to come). +2. Pack the Rendler binaries to `rendler.tar.gz` and place the archive inside the [frameworks_home](http://mesos.apache.org/documentation/latest/configuration/) directory. + +### Running + +To start the Rendler framework, run the following command: +```bash +node index.js -scheduler -master=MASTER_ADDRESS -output=RENDLER_OUTPUT_DIR [-starturl=CRAWL_START_URL] [-user=RUN_AS_USER] +``` + From 33ade1938d0a8f3fcb3d4447c12d8958cfe93437 Mon Sep 17 00:00:00 2001 From: bcrusu Date: Sun, 22 May 2016 23:07:46 +0300 Subject: [PATCH 14/14] converted C# implementation to .NET Core RC2 --- csharp/.gitignore | 11 +- csharp/NuGet.Config | 9 + csharp/Rendler.sln | 22 +- csharp/Rendler/Executors/CrawlExecutor.cs | 93 --- csharp/Rendler/JsonHelper.cs | 27 - csharp/Rendler/Properties/AssemblyInfo.cs | 36 -- csharp/Rendler/Rendler.csproj | 87 --- csharp/Rendler/packages.config | 4 - csharp/dev_nuget_feed/readme | 1 + csharp/ext/readme | 3 - csharp/global.json | 6 + csharp/{ => src}/Rendler/DotHelper.cs | 130 ++-- csharp/src/Rendler/Executors/CrawlExecutor.cs | 106 ++++ .../Rendler/Executors/ExecutorBase.cs | 84 +-- .../Executors/Messages/CrawlResultMessage.cs | 28 +- .../Rendler/Executors/Messages/Message.cs | 28 +- .../Executors/Messages/RenderResultMessage.cs | 28 +- .../Rendler/Executors/RenderExecutor.cs | 154 ++--- csharp/src/Rendler/JsonHelper.cs | 20 + csharp/{ => src}/Rendler/MesosExtensions.cs | 89 +-- csharp/{ => src}/Rendler/Program.cs | 142 ++--- csharp/{ => src}/Rendler/ProgramArguments.cs | 288 ++++----- csharp/src/Rendler/Properties/AssemblyInfo.cs | 16 + csharp/src/Rendler/Rendler.xproj | 19 + csharp/{ => src}/Rendler/RendlerScheduler.cs | 582 +++++++++--------- csharp/{ => src}/Rendler/RunMode.cs | 18 +- csharp/src/Rendler/project.json | 23 + 27 files changed, 1007 insertions(+), 1047 deletions(-) create mode 100644 csharp/NuGet.Config delete mode 100644 csharp/Rendler/Executors/CrawlExecutor.cs delete mode 100644 csharp/Rendler/JsonHelper.cs delete mode 100644 csharp/Rendler/Properties/AssemblyInfo.cs delete mode 100644 csharp/Rendler/Rendler.csproj delete mode 100644 csharp/Rendler/packages.config create mode 100644 csharp/dev_nuget_feed/readme delete mode 100644 csharp/ext/readme create mode 100644 csharp/global.json rename csharp/{ => src}/Rendler/DotHelper.cs (97%) create mode 100644 csharp/src/Rendler/Executors/CrawlExecutor.cs rename csharp/{ => src}/Rendler/Executors/ExecutorBase.cs (96%) rename csharp/{ => src}/Rendler/Executors/Messages/CrawlResultMessage.cs (95%) rename csharp/{ => src}/Rendler/Executors/Messages/Message.cs (94%) rename csharp/{ => src}/Rendler/Executors/Messages/RenderResultMessage.cs (95%) rename csharp/{ => src}/Rendler/Executors/RenderExecutor.cs (94%) create mode 100644 csharp/src/Rendler/JsonHelper.cs rename csharp/{ => src}/Rendler/MesosExtensions.cs (91%) rename csharp/{ => src}/Rendler/Program.cs (96%) rename csharp/{ => src}/Rendler/ProgramArguments.cs (96%) create mode 100644 csharp/src/Rendler/Properties/AssemblyInfo.cs create mode 100644 csharp/src/Rendler/Rendler.xproj rename csharp/{ => src}/Rendler/RendlerScheduler.cs (97%) rename csharp/{ => src}/Rendler/RunMode.cs (92%) create mode 100644 csharp/src/Rendler/project.json diff --git a/csharp/.gitignore b/csharp/.gitignore index 3773b92..7e8b49f 100644 --- a/csharp/.gitignore +++ b/csharp/.gitignore @@ -3,9 +3,8 @@ *.sdf *.userprefs .vs -packages -Rendler/bin -Rendler/obj -ext/mesosclr.dll -ext/libmesosclr.so - +bin +obj +*.so +*.nupkg +*.lock.json diff --git a/csharp/NuGet.Config b/csharp/NuGet.Config new file mode 100644 index 0000000..a58fa55 --- /dev/null +++ b/csharp/NuGet.Config @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/csharp/Rendler.sln b/csharp/Rendler.sln index 1466f8f..aed6a02 100644 --- a/csharp/Rendler.sln +++ b/csharp/Rendler.sln @@ -1,9 +1,16 @@  Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 14 -VisualStudioVersion = 14.0.23107.0 +VisualStudioVersion = 14.0.25123.0 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Rendler", "Rendler\Rendler.csproj", "{493B8B8C-97CF-4C2C-9276-E553C0CB5E88}" +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{5D5A1D30-E747-47E8-8C4F-AAC48FAFA9C4}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{1B43699B-D07A-403E-BA95-8F1EB4E4A590}" + ProjectSection(SolutionItems) = preProject + global.json = global.json + EndProjectSection +EndProject +Project("{8BB2217D-0F2D-49D1-97BC-3654ED321F3B}") = "Rendler", "src\Rendler\Rendler.xproj", "{36A62F5A-1F76-494D-9377-2595AE03C598}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -11,12 +18,15 @@ Global Release|Any CPU = Release|Any CPU EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution - {493B8B8C-97CF-4C2C-9276-E553C0CB5E88}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {493B8B8C-97CF-4C2C-9276-E553C0CB5E88}.Debug|Any CPU.Build.0 = Debug|Any CPU - {493B8B8C-97CF-4C2C-9276-E553C0CB5E88}.Release|Any CPU.ActiveCfg = Release|Any CPU - {493B8B8C-97CF-4C2C-9276-E553C0CB5E88}.Release|Any CPU.Build.0 = Release|Any CPU + {36A62F5A-1F76-494D-9377-2595AE03C598}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {36A62F5A-1F76-494D-9377-2595AE03C598}.Debug|Any CPU.Build.0 = Debug|Any CPU + {36A62F5A-1F76-494D-9377-2595AE03C598}.Release|Any CPU.ActiveCfg = Release|Any CPU + {36A62F5A-1F76-494D-9377-2595AE03C598}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {36A62F5A-1F76-494D-9377-2595AE03C598} = {5D5A1D30-E747-47E8-8C4F-AAC48FAFA9C4} + EndGlobalSection EndGlobal diff --git a/csharp/Rendler/Executors/CrawlExecutor.cs b/csharp/Rendler/Executors/CrawlExecutor.cs deleted file mode 100644 index 8562706..0000000 --- a/csharp/Rendler/Executors/CrawlExecutor.cs +++ /dev/null @@ -1,93 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Net; -using System.Text; -using System.Text.RegularExpressions; -using System.Threading.Tasks; -using mesos; -using mesosclr; -using Rendler.Executors.Messages; - -namespace Rendler.Executors -{ - internal class CrawlExecutor : ExecutorBase - { - private static readonly Regex ExtractLinksRegex = new Regex ("]+href=[\"']?(?[^\"'>]+)[\"']?[^>]*>(.+?)", RegexOptions.Compiled | RegexOptions.IgnoreCase); - - public override void Registered (IExecutorDriver driver, ExecutorInfo executorInfo, FrameworkInfo frameworkInfo, SlaveInfo slaveInfo) - { - Console.WriteLine ($"Registered executor on '{slaveInfo.hostname}'."); - } - - public override void LaunchTask (IExecutorDriver driver, TaskInfo taskInfo) - { - Console.WriteLine ($"Launching crawl task '{taskInfo.task_id.value}'..."); - - Task.Factory.StartNew (() => { - try { - RunTask (driver, taskInfo); - } catch (Exception e) { - Console.WriteLine ($"Exception during crawl operation: {e}"); - driver.SendTaskErrorStatus (taskInfo.task_id); - } - }); - } - - private static void RunTask (IExecutorDriver driver, TaskInfo taskInfo) - { - driver.SendTaskRunningStatus (taskInfo.task_id); - - var url = Encoding.UTF8.GetString (taskInfo.data); - - var htmlContent = GetUrlContent (url); - if (htmlContent != null) { - var links = ExtractLinks (htmlContent); - links = links - .Select (x => x.ToLower ()) - .Distinct (StringComparer.CurrentCultureIgnoreCase); - - if (links.Any ()) - SendCrawlResultMessage (driver, url, links.ToArray ()); - } - - driver.SendTaskFinishedStatus (taskInfo.task_id); - } - - private static IEnumerable ExtractLinks (string htmlContent) - { - var match = ExtractLinksRegex.Match (htmlContent); - while (match.Success) { - yield return match.Groups ["link"].Value.Trim (); - match = match.NextMatch (); - } - } - - private static string GetUrlContent (string url) - { - using (var client = new WebClient ()) { - client.Headers.Add ("X-PoweredBy: minions"); - - try { - return client.DownloadString (url); - } catch (WebException e) { - Console.WriteLine ($"Error fetching url '{url}'; Error: {e}"); - return null; - } - } - } - - private static void SendCrawlResultMessage (IExecutorDriver driver, string url, string[] links) - { - var message = new Message { - Type = "CrawlResult", - Body = JsonHelper.Serialize (new CrawlResultMessage { - Url = url, - Links = links - }) - }; - - driver.SendFrameworkMessage (JsonHelper.Serialize (message)); - } - } -} diff --git a/csharp/Rendler/JsonHelper.cs b/csharp/Rendler/JsonHelper.cs deleted file mode 100644 index 8a18b7f..0000000 --- a/csharp/Rendler/JsonHelper.cs +++ /dev/null @@ -1,27 +0,0 @@ -using System.IO; -using System.Runtime.Serialization.Json; - -namespace Rendler -{ - internal static class JsonHelper - { - public static byte[] Serialize(object obj) - { - var dcs = new DataContractJsonSerializer(obj.GetType()); - using (var ms = new MemoryStream()) - { - dcs.WriteObject(ms, obj); - return ms.ToArray(); - } - } - - public static T Deserialize(byte[] bytes) - { - var dcs = new DataContractJsonSerializer(typeof(T)); - using (var ms = new MemoryStream(bytes)) - { - return (T)dcs.ReadObject(ms); - } - } - } -} diff --git a/csharp/Rendler/Properties/AssemblyInfo.cs b/csharp/Rendler/Properties/AssemblyInfo.cs deleted file mode 100644 index 6315421..0000000 --- a/csharp/Rendler/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,36 +0,0 @@ -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("mesosclr.Rendler")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("IBM")] -[assembly: AssemblyProduct("mesosclr.Rendler")] -[assembly: AssemblyCopyright("Copyright © IBM 2015")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("493b8b8c-97cf-4c2c-9276-e553c0cb5e88")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("1.0.0.0")] -[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/csharp/Rendler/Rendler.csproj b/csharp/Rendler/Rendler.csproj deleted file mode 100644 index 5e76707..0000000 --- a/csharp/Rendler/Rendler.csproj +++ /dev/null @@ -1,87 +0,0 @@ - - - - - Debug - AnyCPU - {493B8B8C-97CF-4C2C-9276-E553C0CB5E88} - Exe - Properties - Rendler - rendler - v4.5 - 512 - true - - - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - false - 6 - - - AnyCPU - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - 6 - - - - - - - - - ..\packages\protobuf-net.2.0.0.668\lib\net40\protobuf-net.dll - - - ..\ext\mesosclr.dll - - - - - - - - - - - - - - - - - - - - - - - - render.js - PreserveNewest - - - libmesosclr.so - PreserveNewest - - - - - diff --git a/csharp/Rendler/packages.config b/csharp/Rendler/packages.config deleted file mode 100644 index 3c1695b..0000000 --- a/csharp/Rendler/packages.config +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/csharp/dev_nuget_feed/readme b/csharp/dev_nuget_feed/readme new file mode 100644 index 0000000..0f775f1 --- /dev/null +++ b/csharp/dev_nuget_feed/readme @@ -0,0 +1 @@ +place 'mesosclr.x.x.x.nupkg' in this directory diff --git a/csharp/ext/readme b/csharp/ext/readme deleted file mode 100644 index b78a6bd..0000000 --- a/csharp/ext/readme +++ /dev/null @@ -1,3 +0,0 @@ -directory containing the mesos-clr binaries required to build Rendler -- mesosclr.dll -- libmesosclr.so diff --git a/csharp/global.json b/csharp/global.json new file mode 100644 index 0000000..b51e28b --- /dev/null +++ b/csharp/global.json @@ -0,0 +1,6 @@ +{ + "projects": [ "src", "test" ], + "sdk": { + "version": "1.0.0-preview1-002702" + } +} diff --git a/csharp/Rendler/DotHelper.cs b/csharp/src/Rendler/DotHelper.cs similarity index 97% rename from csharp/Rendler/DotHelper.cs rename to csharp/src/Rendler/DotHelper.cs index fc18850..5835e7b 100644 --- a/csharp/Rendler/DotHelper.cs +++ b/csharp/src/Rendler/DotHelper.cs @@ -1,65 +1,65 @@ -using System.Collections.Generic; -using System.IO; -using System.Text; - -namespace Rendler -{ - internal static class DotHelper - { - public static void Write(string outputPath, IDictionary> nodeToChildNodes, - IDictionary nodeImageFileName) - { - var nodeNames = new Dictionary(); - var nodeIdCounter = 0; - - using (var fs = new FileStream(outputPath, FileMode.CreateNew, FileAccess.Write, FileShare.Write)) - using (var writer = new StreamWriter(fs, Encoding.UTF8)) - { - writer.WriteLine("digraph G {"); - writer.WriteLine("\tnode [shape=box];"); - - foreach (var node in nodeToChildNodes) - { - var url = node.Key; - var nodeName = "url_" + (++nodeIdCounter); - nodeNames[url] = nodeName; - - writer.Write("\t"); - writer.Write(nodeName); - - string imageFileName; - if (nodeImageFileName.TryGetValue(url, out imageFileName)) - { - writer.Write(" [label=\"\" image=\""); - writer.Write(imageFileName); - } - else - { - writer.Write(" [label=\""); - writer.Write(url); - } - - writer.WriteLine("\"];"); - } - - writer.WriteLine(); - - foreach (var node in nodeToChildNodes) - { - var nodeName = nodeNames[node.Key]; - foreach (var childNode in node.Value) - { - var childNodeName = nodeNames[childNode]; - writer.Write("\t"); - writer.Write(nodeName); - writer.Write(" -> "); - writer.Write(childNodeName); - writer.WriteLine(";"); - } - } - - writer.WriteLine("}"); - } - } - } -} +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace Rendler +{ + internal static class DotHelper + { + public static void Write(string outputPath, IDictionary> nodeToChildNodes, + IDictionary nodeImageFileName) + { + var nodeNames = new Dictionary(); + var nodeIdCounter = 0; + + using (var fs = new FileStream(outputPath, FileMode.CreateNew, FileAccess.Write, FileShare.Write)) + using (var writer = new StreamWriter(fs, Encoding.UTF8)) + { + writer.WriteLine("digraph G {"); + writer.WriteLine("\tnode [shape=box];"); + + foreach (var node in nodeToChildNodes) + { + var url = node.Key; + var nodeName = "url_" + (++nodeIdCounter); + nodeNames[url] = nodeName; + + writer.Write("\t"); + writer.Write(nodeName); + + string imageFileName; + if (nodeImageFileName.TryGetValue(url, out imageFileName)) + { + writer.Write(" [label=\"\" image=\""); + writer.Write(imageFileName); + } + else + { + writer.Write(" [label=\""); + writer.Write(url); + } + + writer.WriteLine("\"];"); + } + + writer.WriteLine(); + + foreach (var node in nodeToChildNodes) + { + var nodeName = nodeNames[node.Key]; + foreach (var childNode in node.Value) + { + var childNodeName = nodeNames[childNode]; + writer.Write("\t"); + writer.Write(nodeName); + writer.Write(" -> "); + writer.Write(childNodeName); + writer.WriteLine(";"); + } + } + + writer.WriteLine("}"); + } + } + } +} diff --git a/csharp/src/Rendler/Executors/CrawlExecutor.cs b/csharp/src/Rendler/Executors/CrawlExecutor.cs new file mode 100644 index 0000000..d79bcd9 --- /dev/null +++ b/csharp/src/Rendler/Executors/CrawlExecutor.cs @@ -0,0 +1,106 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading.Tasks; +using mesos; +using mesosclr; +using Rendler.Executors.Messages; +using System.Net.Http; + +namespace Rendler.Executors +{ + internal class CrawlExecutor : ExecutorBase + { + private static readonly Regex ExtractLinksRegex = new Regex("]+href=[\"']?(?[^\"'>]+)[\"']?[^>]*>(.+?)", RegexOptions.Compiled | RegexOptions.IgnoreCase); + + public override void Registered(IExecutorDriver driver, ExecutorInfo executorInfo, FrameworkInfo frameworkInfo, SlaveInfo slaveInfo) + { + Console.WriteLine($"Registered executor on '{slaveInfo.hostname}'."); + } + + public override void LaunchTask(IExecutorDriver driver, TaskInfo taskInfo) + { + Console.WriteLine($"Launching crawl task '{taskInfo.task_id.value}'..."); + + Task.Factory.StartNew(async () => + { + try + { + await RunTask(driver, taskInfo); + } + catch (Exception e) + { + Console.WriteLine($"Exception during crawl operation: {e}"); + driver.SendTaskErrorStatus(taskInfo.task_id); + } + }); + } + + private static async Task RunTask(IExecutorDriver driver, TaskInfo taskInfo) + { + driver.SendTaskRunningStatus(taskInfo.task_id); + + var url = Encoding.UTF8.GetString(taskInfo.data); + + var htmlContent = await GetUrlContent(url); + if (htmlContent != null) + { + var links = ExtractLinks(htmlContent); + links = links + .Select(x => x.ToLower()) + .Distinct(StringComparer.CurrentCultureIgnoreCase); + + if (links.Any()) + SendCrawlResultMessage(driver, url, links.ToArray()); + } + + driver.SendTaskFinishedStatus(taskInfo.task_id); + } + + private static IEnumerable ExtractLinks(string htmlContent) + { + var match = ExtractLinksRegex.Match(htmlContent); + while (match.Success) + { + yield return match.Groups["link"].Value.Trim(); + match = match.NextMatch(); + } + } + + private static async Task GetUrlContent(string url) + { + using (var client = new HttpClient()) + { + client.DefaultRequestHeaders.Add("X-PoweredBy", "minions"); + + try + { + return await client.GetStringAsync(url); + } + catch (WebException e) + { + Console.WriteLine($"Error fetching url '{url}'; Error: {e}"); + return null; + } + } + } + + private static void SendCrawlResultMessage(IExecutorDriver driver, string url, string[] links) + { + var message = new Message + { + Type = "CrawlResult", + Body = JsonHelper.Serialize(new CrawlResultMessage + { + Url = url, + Links = links + }) + }; + + driver.SendFrameworkMessage(JsonHelper.Serialize(message)); + } + } +} diff --git a/csharp/Rendler/Executors/ExecutorBase.cs b/csharp/src/Rendler/Executors/ExecutorBase.cs similarity index 96% rename from csharp/Rendler/Executors/ExecutorBase.cs rename to csharp/src/Rendler/Executors/ExecutorBase.cs index d8329fb..9768d03 100644 --- a/csharp/Rendler/Executors/ExecutorBase.cs +++ b/csharp/src/Rendler/Executors/ExecutorBase.cs @@ -1,42 +1,42 @@ -using System; -using mesos; -using mesosclr; - -namespace Rendler.Executors -{ - abstract class ExecutorBase : IExecutor - { - public virtual void Registered(IExecutorDriver driver, ExecutorInfo executorInfo, FrameworkInfo frameworkInfo, SlaveInfo slaveInfo) - { - } - - public virtual void Reregistered(IExecutorDriver driver, SlaveInfo slaveInfo) - { - } - - public virtual void Disconnected(IExecutorDriver driver) - { - } - - public virtual void LaunchTask(IExecutorDriver driver, TaskInfo taskInfo) - { - } - - public virtual void KillTask(IExecutorDriver driver, TaskID taskId) - { - } - - public virtual void FrameworkMessage(IExecutorDriver driver, byte[] data) - { - } - - public virtual void Shutdown(IExecutorDriver driver) - { - } - - public virtual void Error(IExecutorDriver driver, string message) - { - Console.WriteLine($"Error: '{message}'."); - } - } -} +using System; +using mesos; +using mesosclr; + +namespace Rendler.Executors +{ + abstract class ExecutorBase : IExecutor + { + public virtual void Registered(IExecutorDriver driver, ExecutorInfo executorInfo, FrameworkInfo frameworkInfo, SlaveInfo slaveInfo) + { + } + + public virtual void Reregistered(IExecutorDriver driver, SlaveInfo slaveInfo) + { + } + + public virtual void Disconnected(IExecutorDriver driver) + { + } + + public virtual void LaunchTask(IExecutorDriver driver, TaskInfo taskInfo) + { + } + + public virtual void KillTask(IExecutorDriver driver, TaskID taskId) + { + } + + public virtual void FrameworkMessage(IExecutorDriver driver, byte[] data) + { + } + + public virtual void Shutdown(IExecutorDriver driver) + { + } + + public virtual void Error(IExecutorDriver driver, string message) + { + Console.WriteLine($"Error: '{message}'."); + } + } +} diff --git a/csharp/Rendler/Executors/Messages/CrawlResultMessage.cs b/csharp/src/Rendler/Executors/Messages/CrawlResultMessage.cs similarity index 95% rename from csharp/Rendler/Executors/Messages/CrawlResultMessage.cs rename to csharp/src/Rendler/Executors/Messages/CrawlResultMessage.cs index bdafe35..b87601f 100644 --- a/csharp/Rendler/Executors/Messages/CrawlResultMessage.cs +++ b/csharp/src/Rendler/Executors/Messages/CrawlResultMessage.cs @@ -1,14 +1,14 @@ -using System.Runtime.Serialization; - -namespace Rendler.Executors.Messages -{ - [DataContract] - public class CrawlResultMessage - { - [DataMember] - public string Url { get; set; } - - [DataMember] - public string[] Links { get; set; } - } -} +using System.Runtime.Serialization; + +namespace Rendler.Executors.Messages +{ + [DataContract] + public class CrawlResultMessage + { + [DataMember] + public string Url { get; set; } + + [DataMember] + public string[] Links { get; set; } + } +} diff --git a/csharp/Rendler/Executors/Messages/Message.cs b/csharp/src/Rendler/Executors/Messages/Message.cs similarity index 94% rename from csharp/Rendler/Executors/Messages/Message.cs rename to csharp/src/Rendler/Executors/Messages/Message.cs index 5bbe425..650fa80 100644 --- a/csharp/Rendler/Executors/Messages/Message.cs +++ b/csharp/src/Rendler/Executors/Messages/Message.cs @@ -1,14 +1,14 @@ -using System.Runtime.Serialization; - -namespace Rendler.Executors.Messages -{ - [DataContract] - internal class Message - { - [DataMember] - public string Type { get; set; } - - [DataMember] - public byte[] Body { get; set; } - } -} +using System.Runtime.Serialization; + +namespace Rendler.Executors.Messages +{ + [DataContract] + internal class Message + { + [DataMember] + public string Type { get; set; } + + [DataMember] + public byte[] Body { get; set; } + } +} diff --git a/csharp/Rendler/Executors/Messages/RenderResultMessage.cs b/csharp/src/Rendler/Executors/Messages/RenderResultMessage.cs similarity index 95% rename from csharp/Rendler/Executors/Messages/RenderResultMessage.cs rename to csharp/src/Rendler/Executors/Messages/RenderResultMessage.cs index a652fba..6e5f0fe 100644 --- a/csharp/Rendler/Executors/Messages/RenderResultMessage.cs +++ b/csharp/src/Rendler/Executors/Messages/RenderResultMessage.cs @@ -1,14 +1,14 @@ -using System.Runtime.Serialization; - -namespace Rendler.Executors.Messages -{ - [DataContract] - public class RenderResultMessage - { - [DataMember] - public string Url { get; set; } - - [DataMember] - public string FileName { get; set; } - } -} +using System.Runtime.Serialization; + +namespace Rendler.Executors.Messages +{ + [DataContract] + public class RenderResultMessage + { + [DataMember] + public string Url { get; set; } + + [DataMember] + public string FileName { get; set; } + } +} diff --git a/csharp/Rendler/Executors/RenderExecutor.cs b/csharp/src/Rendler/Executors/RenderExecutor.cs similarity index 94% rename from csharp/Rendler/Executors/RenderExecutor.cs rename to csharp/src/Rendler/Executors/RenderExecutor.cs index 976835e..dd9bfa5 100644 --- a/csharp/Rendler/Executors/RenderExecutor.cs +++ b/csharp/src/Rendler/Executors/RenderExecutor.cs @@ -1,77 +1,77 @@ -using System; -using System.Diagnostics; -using System.IO; -using System.Text; -using System.Threading.Tasks; -using mesos; -using mesosclr; -using Rendler.Executors.Messages; - -namespace Rendler.Executors -{ - class RenderExecutor : ExecutorBase - { - private string _outputDir; - - public override void Registered(IExecutorDriver driver, ExecutorInfo executorInfo, FrameworkInfo frameworkInfo, SlaveInfo slaveInfo) - { - _outputDir = Encoding.UTF8.GetString (executorInfo.data); - Console.WriteLine ($"Registered executor on host '{slaveInfo.hostname}'. Output dir is '{_outputDir}'."); - } - - public override void LaunchTask(IExecutorDriver driver, TaskInfo taskInfo) - { - Console.WriteLine($"Launching render task '{taskInfo.task_id.value}'..."); - - Task.Factory.StartNew (() => { - try { - RunTask (driver, taskInfo); - } catch (Exception e) { - Console.WriteLine ($"Exception during render operation: {e}"); - driver.SendTaskErrorStatus (taskInfo.task_id); - } - }); - } - - private void RunTask(IExecutorDriver driver, TaskInfo taskInfo) - { - driver.SendTaskRunningStatus(taskInfo.task_id); - - var url = Encoding.UTF8.GetString(taskInfo.data); - var imageFileName = RunRendering(taskInfo.task_id, url); - - SendRenderResultMessage(driver, url, imageFileName); - driver.SendTaskFinishedStatus(taskInfo.task_id); - } - - private string RunRendering(TaskID taskId, string url) - { - var imagePath = Path.Combine(_outputDir, $"{taskId.value}.png"); - - var startInfo = new ProcessStartInfo("phantomjs"); - startInfo.Arguments = $"render.js \"{url}\" \"{imagePath}\""; - startInfo.WindowStyle = ProcessWindowStyle.Hidden; - startInfo.UseShellExecute = false; - - var process = Process.Start(startInfo); - process.WaitForExit(); - - return imagePath; - } - - private static void SendRenderResultMessage(IExecutorDriver driver, string url, string fileName) - { - var message = new Message - { - Type = "RenderResult", - Body = JsonHelper.Serialize(new RenderResultMessage - { - Url = url, - FileName = fileName - }) - }; - - driver.SendFrameworkMessage(JsonHelper.Serialize(message)); - } - } -} +using System; +using System.Diagnostics; +using System.IO; +using System.Text; +using System.Threading.Tasks; +using mesos; +using mesosclr; +using Rendler.Executors.Messages; + +namespace Rendler.Executors +{ + class RenderExecutor : ExecutorBase + { + private string _outputDir; + + public override void Registered(IExecutorDriver driver, ExecutorInfo executorInfo, FrameworkInfo frameworkInfo, SlaveInfo slaveInfo) + { + _outputDir = Encoding.UTF8.GetString (executorInfo.data); + Console.WriteLine ($"Registered executor on host '{slaveInfo.hostname}'. Output dir is '{_outputDir}'."); + } + + public override void LaunchTask(IExecutorDriver driver, TaskInfo taskInfo) + { + Console.WriteLine($"Launching render task '{taskInfo.task_id.value}'..."); + + Task.Factory.StartNew (() => { + try { + RunTask (driver, taskInfo); + } catch (Exception e) { + Console.WriteLine ($"Exception during render operation: {e}"); + driver.SendTaskErrorStatus (taskInfo.task_id); + } + }); + } + + private void RunTask(IExecutorDriver driver, TaskInfo taskInfo) + { + driver.SendTaskRunningStatus(taskInfo.task_id); + + var url = Encoding.UTF8.GetString(taskInfo.data); + var imageFileName = RunRendering(taskInfo.task_id, url); + + SendRenderResultMessage(driver, url, imageFileName); + driver.SendTaskFinishedStatus(taskInfo.task_id); + } + + private string RunRendering(TaskID taskId, string url) + { + var imagePath = Path.Combine(_outputDir, $"{taskId.value}.png"); + + var startInfo = new ProcessStartInfo("phantomjs"); + startInfo.Arguments = $"render.js \"{url}\" \"{imagePath}\""; + startInfo.CreateNoWindow = true; + startInfo.UseShellExecute = false; + + var process = Process.Start(startInfo); + process.WaitForExit(); + + return imagePath; + } + + private static void SendRenderResultMessage(IExecutorDriver driver, string url, string fileName) + { + var message = new Message + { + Type = "RenderResult", + Body = JsonHelper.Serialize(new RenderResultMessage + { + Url = url, + FileName = fileName + }) + }; + + driver.SendFrameworkMessage(JsonHelper.Serialize(message)); + } + } +} diff --git a/csharp/src/Rendler/JsonHelper.cs b/csharp/src/Rendler/JsonHelper.cs new file mode 100644 index 0000000..9c6e93f --- /dev/null +++ b/csharp/src/Rendler/JsonHelper.cs @@ -0,0 +1,20 @@ +using Newtonsoft.Json; +using System.Text; + +namespace Rendler +{ + internal static class JsonHelper + { + public static byte[] Serialize(object obj) + { + var str = JsonConvert.SerializeObject(obj); + return Encoding.UTF8.GetBytes(str); + } + + public static T Deserialize(byte[] bytes) + { + var str = Encoding.UTF8.GetString(bytes); + return JsonConvert.DeserializeObject(str); + } + } +} diff --git a/csharp/Rendler/MesosExtensions.cs b/csharp/src/Rendler/MesosExtensions.cs similarity index 91% rename from csharp/Rendler/MesosExtensions.cs rename to csharp/src/Rendler/MesosExtensions.cs index bb1f8b0..de09c4b 100644 --- a/csharp/Rendler/MesosExtensions.cs +++ b/csharp/src/Rendler/MesosExtensions.cs @@ -1,44 +1,45 @@ -using mesos; -using mesosclr; - -namespace Rendler -{ - internal static class MesosExtensions - { - public static void SendTaskRunningStatus(this IExecutorDriver driver, TaskID taskId) - { - driver.SendStatusUpdate(new TaskStatus - { - task_id = taskId, - state = TaskState.TASK_RUNNING - }); - } - - public static void SendTaskFinishedStatus(this IExecutorDriver driver, TaskID taskId) - { - driver.SendStatusUpdate(new TaskStatus - { - task_id = taskId, - state = TaskState.TASK_FINISHED - }); - } - - public static void SendTaskErrorStatus(this IExecutorDriver driver, TaskID taskId) - { - driver.SendStatusUpdate(new TaskStatus - { - task_id = taskId, - state = TaskState.TASK_ERROR - }); - } - - public static bool IsTerminal(this TaskState state) - { - return state == TaskState.TASK_FINISHED || - state == TaskState.TASK_FAILED || - state == TaskState.TASK_KILLED || - state == TaskState.TASK_LOST || - state == TaskState.TASK_ERROR; - } - } -} +using mesos; +using mesosclr; + +namespace Rendler +{ + internal static class MesosExtensions + { + public static void SendTaskRunningStatus(this IExecutorDriver driver, TaskID taskId) + { + driver.SendStatusUpdate(new TaskStatus + { + task_id = taskId, + state = TaskState.TASK_RUNNING + }); + } + + public static void SendTaskFinishedStatus(this IExecutorDriver driver, TaskID taskId) + { + driver.SendStatusUpdate(new TaskStatus + { + task_id = taskId, + state = TaskState.TASK_FINISHED + }); + } + + public static void SendTaskErrorStatus(this IExecutorDriver driver, TaskID taskId) + { + driver.SendStatusUpdate(new TaskStatus + { + task_id = taskId, + state = TaskState.TASK_ERROR + }); + } + + public static bool IsTerminal(this TaskState state) + { + return state == TaskState.TASK_FINISHED || + state == TaskState.TASK_FAILED || + state == TaskState.TASK_KILLED || + state == TaskState.TASK_LOST || + state == TaskState.TASK_ERROR || + state == TaskState.TASK_KILLING; + } + } +} diff --git a/csharp/Rendler/Program.cs b/csharp/src/Rendler/Program.cs similarity index 96% rename from csharp/Rendler/Program.cs rename to csharp/src/Rendler/Program.cs index 3832f4d..fcb9614 100644 --- a/csharp/Rendler/Program.cs +++ b/csharp/src/Rendler/Program.cs @@ -1,71 +1,71 @@ -using System; -using mesos; -using mesosclr; -using Rendler.Executors; - -namespace Rendler -{ - class Program - { - static int Main(string[] args) - { - var arguments = Arguments.Parse (args); - if (arguments == null || !arguments.Validate ()) - return -1; - - switch (arguments.RunMode) { - case RunMode.Scheduler: - return RunScheduler (arguments.MesosMaster, arguments.StartUrl, arguments.OutputDir, arguments.RunAsUser); - case RunMode.Executor: - return RunExecutor (arguments.ExecutorName); - default: - return -1; - } - } - - private static int RunScheduler(string mesosMaster, string startUrl, string outputDir, string runAsUser) - { - var frameworkInfo = new FrameworkInfo { - id = new FrameworkID { - value = "Rendler" - }, - name = "Rendler (C#)", - failover_timeout = 5, //seconds - checkpoint = false, - user = runAsUser - }; - - var scheduler = new RendlerScheduler(startUrl ?? "https://mesosphere.com", outputDir, runAsUser); - var driver = new MesosSchedulerDriver(scheduler, frameworkInfo, mesosMaster); - - Console.WriteLine ("Running driver..."); - var result = driver.Run() == Status.DRIVER_STOPPED ? 0 : 1; - Console.WriteLine ($"Driver finished with status {result}."); - - return result; - } - - private static int RunExecutor(string executorName) - { - IExecutor executor; - - switch (executorName) - { - case "render": - executor = new RenderExecutor(); - break; - case "crawl": - executor = new CrawlExecutor(); - break; - default: - { - Console.WriteLine($"Invlaid executor provided: '{executorName}'."); - return -2; - } - } - - var driver = new MesosExecutorDriver(executor); - return driver.Run() == Status.DRIVER_STOPPED ? 0 : 1; - } - } -} +using System; +using mesos; +using mesosclr; +using Rendler.Executors; + +namespace Rendler +{ + class Program + { + static int Main(string[] args) + { + var arguments = Arguments.Parse (args); + if (arguments == null || !arguments.Validate ()) + return -1; + + switch (arguments.RunMode) { + case RunMode.Scheduler: + return RunScheduler (arguments.MesosMaster, arguments.StartUrl, arguments.OutputDir, arguments.RunAsUser); + case RunMode.Executor: + return RunExecutor (arguments.ExecutorName); + default: + return -1; + } + } + + private static int RunScheduler(string mesosMaster, string startUrl, string outputDir, string runAsUser) + { + var frameworkInfo = new FrameworkInfo { + id = new FrameworkID { + value = "Rendler" + }, + name = "Rendler (C#)", + failover_timeout = 5, //seconds + checkpoint = false, + user = runAsUser + }; + + var scheduler = new RendlerScheduler(startUrl ?? "https://mesosphere.com", outputDir, runAsUser); + var driver = new MesosSchedulerDriver(scheduler, frameworkInfo, mesosMaster); + + Console.WriteLine ("Running driver..."); + var result = driver.Run() == Status.DRIVER_STOPPED ? 0 : 1; + Console.WriteLine ($"Driver finished with status {result}."); + + return result; + } + + private static int RunExecutor(string executorName) + { + IExecutor executor; + + switch (executorName) + { + case "render": + executor = new RenderExecutor(); + break; + case "crawl": + executor = new CrawlExecutor(); + break; + default: + { + Console.WriteLine($"Invlaid executor provided: '{executorName}'."); + return -2; + } + } + + var driver = new MesosExecutorDriver(executor); + return driver.Run() == Status.DRIVER_STOPPED ? 0 : 1; + } + } +} diff --git a/csharp/Rendler/ProgramArguments.cs b/csharp/src/Rendler/ProgramArguments.cs similarity index 96% rename from csharp/Rendler/ProgramArguments.cs rename to csharp/src/Rendler/ProgramArguments.cs index f0b58ad..f21334c 100644 --- a/csharp/Rendler/ProgramArguments.cs +++ b/csharp/src/Rendler/ProgramArguments.cs @@ -1,144 +1,144 @@ -using System; -using System.IO; - -namespace Rendler -{ - internal class Arguments - { - public RunMode RunMode { get; private set; } - - public string MesosMaster { get; private set; } - - public string ExecutorName { get; private set; } - - public string StartUrl { get; private set; } - - public string OutputDir { get; private set; } - - public string RunAsUser { get; private set; } - - public static Arguments Parse(string[] args) - { - var runMode = RunMode.Default; - string mesosMaster = null; - string executor = null; - string outputDir = null; - string startUrl = null; - string runAsUser = null; - - foreach (var arg in args) - { - if (arg.StartsWith("-executor=")) - { - if (runMode == RunMode.Executor) { - Console.WriteLine("Executor option can be specified only once."); - return null; - } - if (runMode == RunMode.Scheduler) - { - Console.WriteLine("Scheduler and Executor run modes are mutually exclusive."); - return null; - } - - executor = arg.Substring("-executor=".Length); - runMode = RunMode.Executor; - } - else if (arg.Equals("-scheduler")) - { - if (runMode == RunMode.Scheduler) { - Console.WriteLine("Scheduler option can be specified only once."); - return null; - } - if (runMode == RunMode.Executor) - { - Console.WriteLine("Scheduler and Executor run modes are mutually exclusive."); - return null; - } - - runMode = RunMode.Scheduler; - } - else if (arg.StartsWith("-master=")) - { - if (mesosMaster != null) { - Console.WriteLine("Mesos master option can be specified only once."); - return null; - } - - mesosMaster = arg.Substring("-master=".Length); - } - else if (arg.StartsWith("-output=")) - { - if (outputDir != null) { - Console.WriteLine("Output directory option can be specified only once."); - return null; - } - - outputDir = arg.Substring("-output=".Length); - } - else if (arg.StartsWith("-starturl=")) - { - if (startUrl != null) { - Console.WriteLine("Start URL option can be specified only once."); - return null; - } - - startUrl = arg.Substring("-starturl=".Length); - } - else if (arg.StartsWith("-user=")) - { - if (startUrl != null) { - Console.WriteLine("User option can be specified only once."); - return null; - } - - runAsUser = arg.Substring("-user=".Length); - } - else - { - Console.WriteLine($"Unknown argument detected: '{arg}'."); - } - } - - return new Arguments - { - RunMode = runMode, - ExecutorName = executor, - MesosMaster = mesosMaster, - OutputDir = outputDir, - StartUrl = startUrl, - RunAsUser = runAsUser - }; - } - - public bool Validate () - { - switch (RunMode) { - case RunMode.Executor: - if (string.IsNullOrWhiteSpace (ExecutorName)) { - Console.WriteLine ("Invalid executor name."); - return false; - } - break; - case RunMode.Scheduler: - if (string.IsNullOrWhiteSpace (MesosMaster)) { - Console.WriteLine ("Invalid Mesos master address."); - return false; - } - if (string.IsNullOrWhiteSpace (OutputDir)) { - Console.WriteLine ("Invalid output directory."); - return false; - } - if (!Directory.Exists(OutputDir)){ - Console.WriteLine ("Output directory does not exist."); - return false; - } - break; - default: - Console.WriteLine ("Run mode was not specified."); - return false; - } - - return true; - } - } -} +using System; +using System.IO; + +namespace Rendler +{ + internal class Arguments + { + public RunMode RunMode { get; private set; } + + public string MesosMaster { get; private set; } + + public string ExecutorName { get; private set; } + + public string StartUrl { get; private set; } + + public string OutputDir { get; private set; } + + public string RunAsUser { get; private set; } + + public static Arguments Parse(string[] args) + { + var runMode = RunMode.Default; + string mesosMaster = null; + string executor = null; + string outputDir = null; + string startUrl = null; + string runAsUser = null; + + foreach (var arg in args) + { + if (arg.StartsWith("-executor=")) + { + if (runMode == RunMode.Executor) { + Console.WriteLine("Executor option can be specified only once."); + return null; + } + if (runMode == RunMode.Scheduler) + { + Console.WriteLine("Scheduler and Executor run modes are mutually exclusive."); + return null; + } + + executor = arg.Substring("-executor=".Length); + runMode = RunMode.Executor; + } + else if (arg.Equals("-scheduler")) + { + if (runMode == RunMode.Scheduler) { + Console.WriteLine("Scheduler option can be specified only once."); + return null; + } + if (runMode == RunMode.Executor) + { + Console.WriteLine("Scheduler and Executor run modes are mutually exclusive."); + return null; + } + + runMode = RunMode.Scheduler; + } + else if (arg.StartsWith("-master=")) + { + if (mesosMaster != null) { + Console.WriteLine("Mesos master option can be specified only once."); + return null; + } + + mesosMaster = arg.Substring("-master=".Length); + } + else if (arg.StartsWith("-output=")) + { + if (outputDir != null) { + Console.WriteLine("Output directory option can be specified only once."); + return null; + } + + outputDir = arg.Substring("-output=".Length); + } + else if (arg.StartsWith("-starturl=")) + { + if (startUrl != null) { + Console.WriteLine("Start URL option can be specified only once."); + return null; + } + + startUrl = arg.Substring("-starturl=".Length); + } + else if (arg.StartsWith("-user=")) + { + if (startUrl != null) { + Console.WriteLine("User option can be specified only once."); + return null; + } + + runAsUser = arg.Substring("-user=".Length); + } + else + { + Console.WriteLine($"Unknown argument detected: '{arg}'."); + } + } + + return new Arguments + { + RunMode = runMode, + ExecutorName = executor, + MesosMaster = mesosMaster, + OutputDir = outputDir, + StartUrl = startUrl, + RunAsUser = runAsUser + }; + } + + public bool Validate () + { + switch (RunMode) { + case RunMode.Executor: + if (string.IsNullOrWhiteSpace (ExecutorName)) { + Console.WriteLine ("Invalid executor name."); + return false; + } + break; + case RunMode.Scheduler: + if (string.IsNullOrWhiteSpace (MesosMaster)) { + Console.WriteLine ("Invalid Mesos master address."); + return false; + } + if (string.IsNullOrWhiteSpace (OutputDir)) { + Console.WriteLine ("Invalid output directory."); + return false; + } + if (!Directory.Exists(OutputDir)){ + Console.WriteLine ("Output directory does not exist."); + return false; + } + break; + default: + Console.WriteLine ("Run mode was not specified."); + return false; + } + + return true; + } + } +} diff --git a/csharp/src/Rendler/Properties/AssemblyInfo.cs b/csharp/src/Rendler/Properties/AssemblyInfo.cs new file mode 100644 index 0000000..ad431e6 --- /dev/null +++ b/csharp/src/Rendler/Properties/AssemblyInfo.cs @@ -0,0 +1,16 @@ +using System.Reflection; +using System.Runtime.InteropServices; + +[assembly: AssemblyTitle("mesosclr.Rendler")] +[assembly: AssemblyDescription("")] +[assembly: ComVisible(false)] + +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("mesosclr.Rendler")] +[assembly: AssemblyCopyright("")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/csharp/src/Rendler/Rendler.xproj b/csharp/src/Rendler/Rendler.xproj new file mode 100644 index 0000000..cca238a --- /dev/null +++ b/csharp/src/Rendler/Rendler.xproj @@ -0,0 +1,19 @@ + + + + 14.0 + $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion) + + + + 36a62f5a-1f76-494d-9377-2595ae03c598 + Rendler + .\obj + .\bin\ + v4.6.1 + + + 2.0 + + + \ No newline at end of file diff --git a/csharp/Rendler/RendlerScheduler.cs b/csharp/src/Rendler/RendlerScheduler.cs similarity index 97% rename from csharp/Rendler/RendlerScheduler.cs rename to csharp/src/Rendler/RendlerScheduler.cs index 0f14e0c..36f634f 100644 --- a/csharp/Rendler/RendlerScheduler.cs +++ b/csharp/src/Rendler/RendlerScheduler.cs @@ -1,291 +1,291 @@ -using System; -using System.Collections.Concurrent; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Text; -using System.Threading; -using mesos; -using mesosclr; -using Rendler.Executors.Messages; - -namespace Rendler -{ - internal class RendlerScheduler : IScheduler - { - private const int MaxTasksToRun = 256; // limit for demonstration purpose - - private const double RenderCpus = 1d; - private const double RenderMem = 128d; - private const double CrawlCpus = 0.5d; - private const double CrawlMem = 64d; - - private readonly string _outputDir; - private readonly string _runAsUser; - - - private int _launchedTasks; - private int _finishedTasksCount; - private readonly ConcurrentQueue _crawlQueue = new ConcurrentQueue(); - private readonly ConcurrentQueue _renderQueue = new ConcurrentQueue(); - private readonly ISet _crawled = new HashSet(); - - private readonly ConcurrentDictionary _renderResults = new ConcurrentDictionary(); - private readonly ConcurrentDictionary> _crawlResults = new ConcurrentDictionary>(); - - public RendlerScheduler(string startUrl, string outputDir, - string runAsUser = null) - { - if (startUrl == null) throw new ArgumentNullException(nameof(startUrl)); - if (outputDir == null) throw new ArgumentNullException(nameof(outputDir)); - _outputDir = outputDir; - _runAsUser = runAsUser; - - _crawlQueue.Enqueue(startUrl); - _renderQueue.Enqueue(startUrl); - } - - public void Registered(ISchedulerDriver driver, FrameworkID frameworkId, MasterInfo masterInfo) - { - Console.WriteLine($"Registered with Mesos master. FrameworkId='{frameworkId.value}'."); - } - - public void Reregistered(ISchedulerDriver driver, MasterInfo masterInfo) - { - } - - public void ResourceOffers(ISchedulerDriver driver, IEnumerable offers) - { - foreach (var offer in offers) - { - var tasks = new List(); - var resourcesCounter = new ResourcesCounter(offer); - bool done; - do - { - done = true; - - string renderUrl; - if (resourcesCounter.HasRenderTaskResources() && _renderQueue.TryDequeue(out renderUrl)) - { - tasks.Add(GetRenderTaskInfo(offer, ++_launchedTasks, renderUrl)); - resourcesCounter.SubstractRenderResources(); - done = false; - } - - string crawlUrl; - if (resourcesCounter.HasCrawlTaskResources() && _crawlQueue.TryDequeue(out crawlUrl)) - { - tasks.Add(GetCrawlTaskInfo(offer, ++_launchedTasks, crawlUrl)); - resourcesCounter.SubstractCrawlResources(); - _crawled.Add(crawlUrl); - done = false; - } - } while (!done); - - if (tasks.Any ()) { - driver.LaunchTasks (new[] { offer.id }, tasks); - } - else - driver.DeclineOffer(offer.id); - } - } - - public void OfferRescinded(ISchedulerDriver driver, OfferID offerId) - { - } - - public void StatusUpdate(ISchedulerDriver driver, TaskStatus status) - { - if (status.state.IsTerminal()) - { - Console.WriteLine($"Status update: task '{status.task_id.value}' has terminated with state '{status.state}'."); - var finishedTasksCount = Interlocked.Increment(ref _finishedTasksCount); - - if (finishedTasksCount == MaxTasksToRun) - { - Console.WriteLine("Reached the max number of tasks to run. Stopping..."); - - var dotWritePath = Path.Combine(_outputDir, "result.dot"); - DotHelper.Write(dotWritePath, _crawlResults, _renderResults); - driver.Stop(); - } - } - else - { - Console.WriteLine($"Status update: task '{status.task_id.value}' is in state '{status.state}'."); - } - } - - public void FrameworkMessage(ISchedulerDriver driver, ExecutorID executorId, SlaveID slaveId, byte[] data) - { - var message = JsonHelper.Deserialize (data); - switch (message.Type) { - case "CrawlResult": - var crawlResult = JsonHelper.Deserialize (message.Body); - Console.WriteLine ($"Framework message : got {crawlResult.Links.Length} links from url '{crawlResult.Url}'."); - - foreach (var link in crawlResult.Links) { - if (_crawled.Contains (link)) - continue; - - _crawlQueue.Enqueue (link); - _renderQueue.Enqueue (link); - } - - // update edges: url -> links - var edges = _crawlResults.GetOrAdd (crawlResult.Url, x => new List ()); - edges.AddRange (crawlResult.Links); - - // empty edge list for links - foreach (var link in crawlResult.Links) - _crawlResults.GetOrAdd (link, x => new List ()); - break; - case "RenderResult": - var renderResult = JsonHelper.Deserialize (message.Body); - Console.WriteLine ($"Framework message : saved '{renderResult.FileName}' for url '{renderResult.Url}'."); - - _renderResults [renderResult.Url] = renderResult.FileName; - break; - default: - Console.WriteLine ($"Unrecognized message type: '{message.Type}'"); - break; - } - } - - public void Disconnected(ISchedulerDriver driver) - { - } - - public void SlaveLost(ISchedulerDriver driver, SlaveID slaveId) - { - } - - public void ExecutorLost(ISchedulerDriver driver, ExecutorID executorId, SlaveID slaveId, int status) - { - } - - public void Error(ISchedulerDriver driver, string message) - { - Console.WriteLine($"Error: '{message}'."); - } - - private TaskInfo GetRenderTaskInfo(Offer offer, int uniqueId, string url) - { - var result = new TaskInfo { - name = "Rendler.Render_" + uniqueId, - task_id = new TaskID { value = uniqueId.ToString () }, - slave_id = offer.slave_id, - resources = { - new Resource { - name = "cpus", - type = Value.Type.SCALAR, - scalar = new Value.Scalar { value = RenderCpus } - }, - new Resource { - name = "mem", - type = Value.Type.SCALAR, - scalar = new Value.Scalar { value = RenderMem } - } - }, - executor = new ExecutorInfo { - executor_id = new ExecutorID { value = "RenderExecutor" }, - command = new CommandInfo { - value = "mono rendler.exe -executor=render", - user = _runAsUser - }, - data = Encoding.UTF8.GetBytes (_outputDir) - }, - data = Encoding.UTF8.GetBytes (url) - }; - - result.executor.command.uris.Add (new CommandInfo.URI { - cache = false, - extract = true, - value = "./rendler.tar.gz", // relative to "frameworks_home" mesos-slave command argument - executable = false - }); - - return result; - } - - private TaskInfo GetCrawlTaskInfo(Offer offer, int uniqueId, string url) - { - var result = new TaskInfo { - name = "Rendler.Crawl_" + uniqueId, - task_id = new TaskID { value = uniqueId.ToString () }, - slave_id = offer.slave_id, - resources = { - new Resource { - name = "cpus", - type = Value.Type.SCALAR, - scalar = new Value.Scalar { value = CrawlCpus } - }, - new Resource { name = "mem", type = Value.Type.SCALAR, scalar = new Value.Scalar { value = CrawlMem } } - }, - executor = new ExecutorInfo { - executor_id = new ExecutorID { value = "CrawlExecutor" }, - command = new CommandInfo { - value = "mono rendler.exe -executor=crawl", - user = _runAsUser - }, - }, - data = Encoding.UTF8.GetBytes (url) - }; - - result.executor.command.uris.Add (new CommandInfo.URI { - cache = false, - extract = true, - value = "./rendler.tar.gz", // relative to "frameworks_home" mesos-slave command argument - executable = false - }); - - return result; - } - - private class ResourcesCounter - { - private double _cpus; - private double _mem; - - public ResourcesCounter(Offer offer) - { - var cpusResource = offer.resources.SingleOrDefault(x => x.name == "cpus"); - var memResource = offer.resources.SingleOrDefault(x => x.name == "mem"); - _cpus = cpusResource?.scalar.value ?? 0d; - _mem = memResource?.scalar.value ?? 0d; - } - - private void Substract(double cpus, double mem) - { - _cpus = _cpus - cpus; - _mem = _mem - mem; - } - - public bool HasRenderTaskResources() - { - return HasResources(RenderCpus, RenderMem); - } - - public bool HasCrawlTaskResources() - { - return HasResources(CrawlCpus, CrawlMem); - } - - public void SubstractRenderResources() - { - Substract(RenderCpus, RenderMem); - } - - public void SubstractCrawlResources() - { - Substract(CrawlCpus, CrawlMem); - } - - private bool HasResources(double cpus, double mem) - { - return _cpus >= cpus && _mem >= mem; - } - } - } -} +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading; +using mesos; +using mesosclr; +using Rendler.Executors.Messages; + +namespace Rendler +{ + internal class RendlerScheduler : IScheduler + { + private const int MaxTasksToRun = 256; // limit for demonstration purpose + + private const double RenderCpus = 1d; + private const double RenderMem = 128d; + private const double CrawlCpus = 0.5d; + private const double CrawlMem = 64d; + + private readonly string _outputDir; + private readonly string _runAsUser; + + + private int _launchedTasks; + private int _finishedTasksCount; + private readonly ConcurrentQueue _crawlQueue = new ConcurrentQueue(); + private readonly ConcurrentQueue _renderQueue = new ConcurrentQueue(); + private readonly ISet _crawled = new HashSet(); + + private readonly ConcurrentDictionary _renderResults = new ConcurrentDictionary(); + private readonly ConcurrentDictionary> _crawlResults = new ConcurrentDictionary>(); + + public RendlerScheduler(string startUrl, string outputDir, + string runAsUser = null) + { + if (startUrl == null) throw new ArgumentNullException(nameof(startUrl)); + if (outputDir == null) throw new ArgumentNullException(nameof(outputDir)); + _outputDir = outputDir; + _runAsUser = runAsUser; + + _crawlQueue.Enqueue(startUrl); + _renderQueue.Enqueue(startUrl); + } + + public void Registered(ISchedulerDriver driver, FrameworkID frameworkId, MasterInfo masterInfo) + { + Console.WriteLine($"Registered with Mesos master. FrameworkId='{frameworkId.value}'."); + } + + public void Reregistered(ISchedulerDriver driver, MasterInfo masterInfo) + { + } + + public void ResourceOffers(ISchedulerDriver driver, IEnumerable offers) + { + foreach (var offer in offers) + { + var tasks = new List(); + var resourcesCounter = new ResourcesCounter(offer); + bool done; + do + { + done = true; + + string renderUrl; + if (resourcesCounter.HasRenderTaskResources() && _renderQueue.TryDequeue(out renderUrl)) + { + tasks.Add(GetRenderTaskInfo(offer, ++_launchedTasks, renderUrl)); + resourcesCounter.SubstractRenderResources(); + done = false; + } + + string crawlUrl; + if (resourcesCounter.HasCrawlTaskResources() && _crawlQueue.TryDequeue(out crawlUrl)) + { + tasks.Add(GetCrawlTaskInfo(offer, ++_launchedTasks, crawlUrl)); + resourcesCounter.SubstractCrawlResources(); + _crawled.Add(crawlUrl); + done = false; + } + } while (!done); + + if (tasks.Any ()) { + driver.LaunchTasks (new[] { offer.id }, tasks); + } + else + driver.DeclineOffer(offer.id); + } + } + + public void OfferRescinded(ISchedulerDriver driver, OfferID offerId) + { + } + + public void StatusUpdate(ISchedulerDriver driver, TaskStatus status) + { + if (status.state.IsTerminal()) + { + Console.WriteLine($"Status update: task '{status.task_id.value}' has terminated with state '{status.state}'."); + var finishedTasksCount = Interlocked.Increment(ref _finishedTasksCount); + + if (finishedTasksCount == MaxTasksToRun) + { + Console.WriteLine("Reached the max number of tasks to run. Stopping..."); + + var dotWritePath = Path.Combine(_outputDir, "result.dot"); + DotHelper.Write(dotWritePath, _crawlResults, _renderResults); + driver.Stop(); + } + } + else + { + Console.WriteLine($"Status update: task '{status.task_id.value}' is in state '{status.state}'."); + } + } + + public void FrameworkMessage(ISchedulerDriver driver, ExecutorID executorId, SlaveID slaveId, byte[] data) + { + var message = JsonHelper.Deserialize (data); + switch (message.Type) { + case "CrawlResult": + var crawlResult = JsonHelper.Deserialize (message.Body); + Console.WriteLine ($"Framework message : got {crawlResult.Links.Length} links from url '{crawlResult.Url}'."); + + foreach (var link in crawlResult.Links) { + if (_crawled.Contains (link)) + continue; + + _crawlQueue.Enqueue (link); + _renderQueue.Enqueue (link); + } + + // update edges: url -> links + var edges = _crawlResults.GetOrAdd (crawlResult.Url, x => new List ()); + edges.AddRange (crawlResult.Links); + + // empty edge list for links + foreach (var link in crawlResult.Links) + _crawlResults.GetOrAdd (link, x => new List ()); + break; + case "RenderResult": + var renderResult = JsonHelper.Deserialize (message.Body); + Console.WriteLine ($"Framework message : saved '{renderResult.FileName}' for url '{renderResult.Url}'."); + + _renderResults [renderResult.Url] = renderResult.FileName; + break; + default: + Console.WriteLine ($"Unrecognized message type: '{message.Type}'"); + break; + } + } + + public void Disconnected(ISchedulerDriver driver) + { + } + + public void SlaveLost(ISchedulerDriver driver, SlaveID slaveId) + { + } + + public void ExecutorLost(ISchedulerDriver driver, ExecutorID executorId, SlaveID slaveId, int status) + { + } + + public void Error(ISchedulerDriver driver, string message) + { + Console.WriteLine($"Error: '{message}'."); + } + + private TaskInfo GetRenderTaskInfo(Offer offer, int uniqueId, string url) + { + var result = new TaskInfo { + name = "Rendler.Render_" + uniqueId, + task_id = new TaskID { value = uniqueId.ToString () }, + slave_id = offer.slave_id, + resources = { + new Resource { + name = "cpus", + type = Value.Type.SCALAR, + scalar = new Value.Scalar { value = RenderCpus } + }, + new Resource { + name = "mem", + type = Value.Type.SCALAR, + scalar = new Value.Scalar { value = RenderMem } + } + }, + executor = new ExecutorInfo { + executor_id = new ExecutorID { value = "RenderExecutor" }, + command = new CommandInfo { + value = "mono rendler.exe -executor=render", + user = _runAsUser + }, + data = Encoding.UTF8.GetBytes (_outputDir) + }, + data = Encoding.UTF8.GetBytes (url) + }; + + result.executor.command.uris.Add (new CommandInfo.URI { + cache = false, + extract = true, + value = "./rendler.tar.gz", // relative to "frameworks_home" mesos-slave command argument + executable = false + }); + + return result; + } + + private TaskInfo GetCrawlTaskInfo(Offer offer, int uniqueId, string url) + { + var result = new TaskInfo { + name = "Rendler.Crawl_" + uniqueId, + task_id = new TaskID { value = uniqueId.ToString () }, + slave_id = offer.slave_id, + resources = { + new Resource { + name = "cpus", + type = Value.Type.SCALAR, + scalar = new Value.Scalar { value = CrawlCpus } + }, + new Resource { name = "mem", type = Value.Type.SCALAR, scalar = new Value.Scalar { value = CrawlMem } } + }, + executor = new ExecutorInfo { + executor_id = new ExecutorID { value = "CrawlExecutor" }, + command = new CommandInfo { + value = "mono rendler.exe -executor=crawl", + user = _runAsUser + }, + }, + data = Encoding.UTF8.GetBytes (url) + }; + + result.executor.command.uris.Add (new CommandInfo.URI { + cache = false, + extract = true, + value = "./rendler.tar.gz", // relative to "frameworks_home" mesos-slave command argument + executable = false + }); + + return result; + } + + private class ResourcesCounter + { + private double _cpus; + private double _mem; + + public ResourcesCounter(Offer offer) + { + var cpusResource = offer.resources.SingleOrDefault(x => x.name == "cpus"); + var memResource = offer.resources.SingleOrDefault(x => x.name == "mem"); + _cpus = cpusResource?.scalar.value ?? 0d; + _mem = memResource?.scalar.value ?? 0d; + } + + private void Substract(double cpus, double mem) + { + _cpus = _cpus - cpus; + _mem = _mem - mem; + } + + public bool HasRenderTaskResources() + { + return HasResources(RenderCpus, RenderMem); + } + + public bool HasCrawlTaskResources() + { + return HasResources(CrawlCpus, CrawlMem); + } + + public void SubstractRenderResources() + { + Substract(RenderCpus, RenderMem); + } + + public void SubstractCrawlResources() + { + Substract(CrawlCpus, CrawlMem); + } + + private bool HasResources(double cpus, double mem) + { + return _cpus >= cpus && _mem >= mem; + } + } + } +} diff --git a/csharp/Rendler/RunMode.cs b/csharp/src/Rendler/RunMode.cs similarity index 92% rename from csharp/Rendler/RunMode.cs rename to csharp/src/Rendler/RunMode.cs index ae89e3d..d7fdc2c 100644 --- a/csharp/Rendler/RunMode.cs +++ b/csharp/src/Rendler/RunMode.cs @@ -1,9 +1,9 @@ -namespace Rendler -{ - internal enum RunMode - { - Default, - Scheduler, - Executor - } -} +namespace Rendler +{ + internal enum RunMode + { + Default, + Scheduler, + Executor + } +} diff --git a/csharp/src/Rendler/project.json b/csharp/src/Rendler/project.json new file mode 100644 index 0000000..381c288 --- /dev/null +++ b/csharp/src/Rendler/project.json @@ -0,0 +1,23 @@ +{ + "version": "1.0.0-*", + "buildOptions": { + "emitEntryPoint": true, + "copyToOutput": "libmesosclr.so" + }, + + "dependencies": { + "mesosclr": "1.0.0", + "Microsoft.NETCore.App": { + "type": "platform", + "version": "1.0.0-rc2-3002702" + }, + "Newtonsoft.Json": "8.0.3", + "System.Net.Http": "4.0.1-rc2-24027" + }, + + "frameworks": { + "netcoreapp1.0": { + "imports": "dnxcore50" + } + } +}