From 2e7bb99b385152920929a99cc6f39bd384ad73bd Mon Sep 17 00:00:00 2001 From: Prahladha Seerapu Date: Thu, 6 Jul 2017 12:18:31 -0500 Subject: [PATCH] when icecc is from a chroot with a ICECC_VERSION, builds on local machine fail. #128 (#262) * when icecc is from a chroot with a ICECC_VERSION, builds on local machine fail.#128 * Revert "Allow specyfing an alternate remote name in iceccd (#157)" This reverts commit 6e9061c3afc459abb19d08625ce99875414165a5. --- README.md | 7 ------- daemon/main.cpp | 46 +++++++++++++++++++++++++++-------------- doc/man-iceccd.1.xml | 14 ------------- scheduler/scheduler.cpp | 25 +++++++++++++++------- services/comm.cpp | 18 ++++++++++++++++ services/comm.h | 25 +++++++++++++++++++--- 6 files changed, 89 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 192f66874..610749104 100644 --- a/README.md +++ b/README.md @@ -539,13 +539,6 @@ problems configuring broadcast. So you might need the -s option for the daemon in any case there. If the monitor can't find the scheduler, use USE\_SCHEDULER=\ icemon (or send me a patch :) -Note: If there is NAT between a daemon and the scheduler, you may experience -failures when occasionally jobs bounce back to be compiled on the local daemon -that issued the compile. You may try the --extra-name option on these daemons -if you know ahead of time what is the external IP as seen by the scheduler, -for example in a virtualized setup with 1-1 NAT floating IPs that a startup -script can obtain, but that are not visible in normal networking tools. - I use distcc, why should I change? ------------------------------------------------------------------------------------------------------------------- diff --git a/daemon/main.cpp b/daemon/main.cpp index f87a799d2..b18d3bf97 100644 --- a/daemon/main.cpp +++ b/daemon/main.cpp @@ -455,7 +455,6 @@ struct Daemon { map fd2chan; int new_client_id; string remote_name; - string extra_remote_name; time_t next_scheduler_connect; unsigned long icecream_load; struct timeval icecream_usage; @@ -529,6 +528,7 @@ struct Daemon { int scheduler_get_internals() __attribute_warn_unused_result__; void clear_children(); int scheduler_use_cs(UseCSMsg *msg) __attribute_warn_unused_result__; + int scheduler_no_cs(NoCSMsg *msg) __attribute_warn_unused_result__; bool handle_get_cs(Client *client, Msg *msg) __attribute_warn_unused_result__; bool handle_local_job(Client *client, Msg *msg) __attribute_warn_unused_result__; bool handle_job_done(Client *cl, JobDoneMsg *m) __attribute_warn_unused_result__; @@ -846,7 +846,7 @@ string Daemon::dump_internals() const string result; result += "Node Name: " + nodename + "\n"; - result += " Remote name: " + remote_name + " [" + extra_remote_name + "]\n"; + result += " Remote name: " + remote_name + "\n"; for (map::const_iterator it = fd2chan.begin(); it != fd2chan.end(); ++it) { result += " fd2chan[" + toString(it->first) + "] = " + it->second->dump() + "\n"; @@ -920,7 +920,7 @@ int Daemon::scheduler_use_cs(UseCSMsg *msg) return 1; } - if ((msg->hostname == remote_name || msg->hostname == extra_remote_name) && int(msg->port) == daemon_port) { + if (msg->hostname == remote_name && int(msg->port) == daemon_port) { c->usecsmsg = new UseCSMsg(msg->host_platform, "127.0.0.1", daemon_port, msg->job_id, true, 1, msg->matched_job_id); c->status = Client::PENDING_USE_CS; @@ -941,6 +941,29 @@ int Daemon::scheduler_use_cs(UseCSMsg *msg) return 0; } +int Daemon::scheduler_no_cs(NoCSMsg *msg) +{ + Client *c = clients.find_by_client_id(msg->client_id); + trace() << "handle_use_cs " << msg->job_id << " " << msg->client_id + << " " << c << " " << endl; + + if (!c) { + if (send_scheduler(JobDoneMsg(msg->job_id, 107, JobDoneMsg::FROM_SUBMITTER))) { + return 1; + } + + return 1; + } + + c->usecsmsg = new UseCSMsg(string(), "127.0.0.1", daemon_port, msg->job_id, true, 1, 0); + c->status = Client::PENDING_USE_CS; + + c->job_id = msg->job_id; + + return 0; + +} + bool Daemon::handle_transfer_env(Client *client, Msg *_msg) { log_error() << "handle_transfer_env" << endl; @@ -1815,6 +1838,9 @@ int Daemon::answer_client_requests() case M_USE_CS: ret = scheduler_use_cs(static_cast(msg)); break; + case M_NO_CS: + ret = scheduler_no_cs(static_cast(msg)); + break; case M_GET_INTERNALS: ret = scheduler_get_internals(); break; @@ -1981,8 +2007,7 @@ bool Daemon::reconnect() remote_name = string(); } - log_info() << "Connected to scheduler (I am known as " << remote_name - << " [" << extra_remote_name << "]" << ")" << endl; + log_info() << "Connected to scheduler (I am known as " << remote_name << ")" << endl; current_load = -1000; gettimeofday(&last_stat, 0); icecream_load = 0; @@ -2027,11 +2052,6 @@ int main(int argc, char **argv) bool detach = false; nice_level = 5; // defined in serve.h - const char* extra_name = getenv("ICECC_EXTRA_NAME"); - if (extra_name && *extra_name) { - d.extra_remote_name = extra_name; - } - while (true) { int option_index = 0; static const struct option long_options[] = { @@ -2048,7 +2068,6 @@ int main(int argc, char **argv) { "cache-limit", 1, NULL, 0}, { "no-remote", 0, NULL, 0}, { "port", 1, NULL, 'p'}, - { "extra-name", 1, NULL, 0}, { 0, 0, 0, 0 } }; @@ -2092,11 +2111,8 @@ int main(int argc, char **argv) } } else if (optname == "no-remote") { d.noremote = true; - } else if (optname == "extra-name") { - if (optarg && *optarg) { - d.extra_remote_name = optarg; - } } + } break; case 'd': diff --git a/doc/man-iceccd.1.xml b/doc/man-iceccd.1.xml index 4eabe693d..d9b3e17af 100644 --- a/doc/man-iceccd.1.xml +++ b/doc/man-iceccd.1.xml @@ -35,7 +35,6 @@ -b env-basedir --cache-limit MB -d ---extra-name name -l log-file -m max-processes -N hostname @@ -91,19 +90,6 @@ environments of compile clients. Detach daemon from shell. - - name - -Use an additional name (IP address) when considering which jobs coming -from the scheduler are in fact local jobs that bounced back. This can be useful -if the scheduler sees the daemon's IP as something the daemon was unable to -discover, due to NAT between the daemon and the scheduler for example. - -This option can be passed by the environment variable ICECC_EXTRA_NAME -as well; the command line option overrides the environment variable. - - - , Print help message and exit. diff --git a/scheduler/scheduler.cpp b/scheduler/scheduler.cpp index e81c2e5c0..c3cec1d9c 100755 --- a/scheduler/scheduler.cpp +++ b/scheduler/scheduler.cpp @@ -877,16 +877,27 @@ static bool empty_queue() break; } } - - UseCSMsg m2(host_platform, cs->name, cs->remotePort(), job->id(), + if(IS_PROTOCOL_37(job->submitter()) && cs == job->submitter()) + { + NoCSMsg m2(job->id(), job->localClientId()); + if (!job->submitter()->send_msg(m2)) { + trace() << "failed to deliver job " << job->id() << endl; + handle_end(job->submitter(), 0); // will care for the rest + return true; + } + } + else + { + UseCSMsg m2(host_platform, cs->name, cs->remotePort(), job->id(), gotit, job->localClientId(), matched_job_id); - - if (!job->submitter()->send_msg(m2)) { - trace() << "failed to deliver job " << job->id() << endl; - handle_end(job->submitter(), 0); // will care for the rest - return true; + if (!job->submitter()->send_msg(m2)) { + trace() << "failed to deliver job " << job->id() << endl; + handle_end(job->submitter(), 0); // will care for the rest + return true; + } } + #if DEBUG_SCHEDULER >= 0 if (!gotit) { trace() << "put " << job->id() << " in joblist of " << cs->nodeName() << " (will install now)" << endl; diff --git a/services/comm.cpp b/services/comm.cpp index 8045e2c41..9cb796b06 100644 --- a/services/comm.cpp +++ b/services/comm.cpp @@ -1031,6 +1031,9 @@ Msg *MsgChannel::get_msg(int timeout) case M_USE_CS: m = new UseCSMsg; break; + case M_NO_CS: + m = new NoCSMsg; + break; case M_COMPILE_FILE: m = new CompileFileMsg(new CompileJob, true); break; @@ -1602,6 +1605,21 @@ void UseCSMsg::send_to_channel(MsgChannel *c) const } } +void NoCSMsg::fill_from_channel(MsgChannel *c) +{ + Msg::fill_from_channel(c); + *c >> job_id; + *c >> client_id; +} + +void NoCSMsg::send_to_channel(MsgChannel *c) const +{ + Msg::send_to_channel(c); + *c << job_id; + *c << client_id; +} + + void CompileFileMsg::fill_from_channel(MsgChannel *c) { Msg::fill_from_channel(c); diff --git a/services/comm.h b/services/comm.h index fca8377a3..9518e4526 100644 --- a/services/comm.h +++ b/services/comm.h @@ -36,7 +36,7 @@ #include "job.h" // if you increase the PROTOCOL_VERSION, add a macro below and use that -#define PROTOCOL_VERSION 36 +#define PROTOCOL_VERSION 37 // if you increase the MIN_PROTOCOL_VERSION, comment out macros below and clean up the code #define MIN_PROTOCOL_VERSION 21 @@ -61,6 +61,7 @@ #define IS_PROTOCOL_34(c) ((c)->protocol >= 34) #define IS_PROTOCOL_35(c) ((c)->protocol >= 35) #define IS_PROTOCOL_36(c) ((c)->protocol >= 36) +#define IS_PROTOCOL_37(c) ((c)->protocol >= 37) enum MsgType { // so far unknown @@ -85,7 +86,6 @@ enum MsgType { M_GET_CS, // S --> C M_USE_CS, // = 'G' - // C --> CS M_COMPILE_FILE, // = 'I' // generic file transfer @@ -127,7 +127,9 @@ enum MsgType { M_VERIFY_ENV, M_VERIFY_ENV_RESULT, // C --> CS, CS --> S (forwarded from C), to not use given host for given environment - M_BLACKLIST_HOST_ENV + M_BLACKLIST_HOST_ENV, + // S --> CS + M_NO_CS }; class MsgChannel; @@ -417,6 +419,23 @@ class UseCSMsg : public Msg uint32_t matched_job_id; }; +class NoCSMsg : public Msg +{ +public: + NoCSMsg() + : Msg(M_NO_CS) {} + NoCSMsg(unsigned int id, unsigned int _client_id) + : Msg(M_NO_CS), + job_id(id), + client_id(_client_id) {} + + virtual void fill_from_channel(MsgChannel *c); + virtual void send_to_channel(MsgChannel *c) const; + + uint32_t job_id; + uint32_t client_id; +}; + class GetNativeEnvMsg : public Msg { public: