Skip to content
11 changes: 2 additions & 9 deletions core/federated/RTI/rti_remote.c
Original file line number Diff line number Diff line change
Expand Up @@ -636,23 +636,20 @@ void handle_address_query(uint16_t fed_id) {

int32_t server_port;
uint32_t* ip_address;
char* server_host_name;
uint32_t temp = 0;

LF_MUTEX_LOCK(&rti_mutex);
// Check if the RTI has initialized the remote federate's network abstraction.
if (remote_fed->net == NULL) {
// RTI has not set up the remote federate. Respond with -1 to indicate an unknown port number.
server_port = -1;
uint32_t temp = 0;
ip_address = &temp;
server_host_name = "localhost";
} else {
// The network abstraction is initialized, but the RTI might still not know the port number. This can happen if the
// RTI has not yet received a MSG_TYPE_ADDRESS_ADVERTISEMENT message from the remote federate. In such cases, the
// returned port number might still be -1.
server_port = ((socket_priv_t*)remote_fed->net)->server_port;
ip_address = (uint32_t*)&((socket_priv_t*)remote_fed->net)->server_ip_addr;
server_host_name = ((socket_priv_t*)remote_fed->net)->server_hostname;
}

encode_int32(server_port, (unsigned char*)&buffer[1]);
Expand All @@ -669,8 +666,7 @@ void handle_address_query(uint16_t fed_id) {
tracepoint_rti_to_federate(send_ADR_QR_REP, fed_id, NULL);
}

LF_PRINT_DEBUG("Replied to address query from federate %d with address %s:%d.", fed_id, server_host_name,
server_port);
LF_PRINT_DEBUG("Replied to address query from federate %d", fed_id);
}

void handle_address_ad(uint16_t federate_id) {
Expand Down Expand Up @@ -1057,7 +1053,6 @@ void send_reject(net_abstraction_t net_abs, unsigned char error_code) {
}
// Close the network abstraction without reading until EOF.
shutdown_net(net_abs, false);
net_abs = NULL;
LF_MUTEX_UNLOCK(&rti_mutex);
}

Expand Down Expand Up @@ -1427,7 +1422,6 @@ void lf_connect_to_federates(net_abstraction_t rti_net) {
lf_print_warning("RTI failed to authenticate the incoming federate.");
// Close the network abstraction without reading until EOF.
shutdown_net(fed_net, false);
fed_net = NULL;
// Ignore the federate that failed authentication.
i--;
continue;
Expand Down Expand Up @@ -1496,7 +1490,6 @@ void* respond_to_erroneous_connections(void* nothing) {
}
// Close the network abstraction without reading until EOF.
shutdown_net(fed_net, false);
fed_net = NULL;
}
return NULL;
}
Expand Down
2 changes: 1 addition & 1 deletion core/federated/RTI/rti_remote.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ void handle_address_query(uint16_t fed_id);
* byte. The RTI will keep a record of this number in the .server_port
* field of the _RTI.federates[federate_id] array of structs.
*
* The server_hostname and server_ip_addr fields are assigned
* The server_ip_addr field is assigned
* in lf_connect_to_federates() upon accepting the socket
* from the remote federate.
*
Expand Down
13 changes: 4 additions & 9 deletions core/federated/federate.c
Original file line number Diff line number Diff line change
Expand Up @@ -1753,13 +1753,10 @@ void lf_connect_to_federate(uint16_t remote_federate_id) {
assert(port > 0);
uint16_t uport = (uint16_t)port;

char hostname[INET_ADDRSTRLEN];
inet_ntop(AF_INET, &host_ip_addr, hostname, INET_ADDRSTRLEN);

socket_connection_params_t params;
socket_connection_params_t params = {0};
params.type = TCP;
params.port = uport;
params.server_hostname = hostname;
params.server_ip_addr = &host_ip_addr;
net_abstraction_t net = connect_to_net((net_params_t)&params);
if (net == NULL) {
lf_print_error_and_exit("Failed to connect to federate.");
Expand Down Expand Up @@ -1837,7 +1834,7 @@ void lf_connect_to_rti(const char* hostname, int port) {
hostname = federation_metadata.rti_host ? federation_metadata.rti_host : hostname;
port = federation_metadata.rti_port >= 0 ? federation_metadata.rti_port : port;

socket_connection_params_t params;
socket_connection_params_t params = {0};
params.type = TCP;
params.port = port;
params.server_hostname = hostname;
Expand Down Expand Up @@ -2013,7 +2010,7 @@ void* lf_handle_p2p_connections_from_federates(void* env_arg) {
net_abstraction_t net = accept_net(_fed.server_net);
if (net == NULL) {
lf_print_warning("Federate failed to accept the network abstraction.");
return NULL;
continue;
Copy link

Copilot AI Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If accept_net() returns NULL (e.g., because the server socket was closed and accept() starts failing with EBADF), continue will immediately retry in a tight loop, potentially spamming logs and burning CPU until _lf_termination_executed flips. Consider breaking/returning on NULL (as before), or adding an explicit backoff + exit condition based on errno/termination state.

Suggested change
continue;
break;

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

void* lf_handle_p2p_connections_from_federates(void* env_arg) {
...
  while (received_federates < _fed.number_of_inbound_p2p_connections && !_lf_termination_executed) {
    if (rti_failed()) {
      break;
    }
    // Wait for an incoming connection request.
    net_abstraction_t net = accept_net(_fed.server_net);
    if (net == NULL) {
      lf_print_warning("Federate failed to accept the network abstraction.");
      continue;
    }
...
  }

  LF_PRINT_LOG("All %zu remote federates are connected.", _fed.number_of_inbound_p2p_connections);
  return NULL;
}

I think it should still be continue, however, as it says it will keep looping when there are spamming connect() attempts.

How about adding a short sleep like 100 ms? @edwardalee

Also besides of that after rti_failed(), I think it should be return NULL;, because it shouldn't print the log that all remote federates are connected.

}
LF_PRINT_LOG("Accepted new connection from remote federate.");

Expand All @@ -2034,7 +2031,6 @@ void* lf_handle_p2p_connections_from_federates(void* env_arg) {
write_to_net(net, 2, response);
}
shutdown_net(net, false);
net = NULL;
continue;
}

Expand All @@ -2055,7 +2051,6 @@ void* lf_handle_p2p_connections_from_federates(void* env_arg) {
write_to_net(net, 2, response);
}
shutdown_net(net, false);
net = NULL;
continue;
}

Expand Down
10 changes: 6 additions & 4 deletions network/api/socket_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ typedef struct socket_connection_params_t {

/** @brief Hostname of the remote server. */
const char* server_hostname;

/** @brief IP address of the remote server. If provided, bypasses DNS resolution. */
struct in_addr* server_ip_addr;
} socket_connection_params_t;
Comment on lines 134 to 139
Copy link

Copilot AI Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

server_ip_addr is treated as an input-only value (and connect_to_socket() does not modify it). Declaring this as const struct in_addr* (in both socket_connection_params_t and connect_to_socket() signature) would better communicate intent and allow callers to pass pointers to const data without casts.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.


/**
Expand All @@ -149,8 +152,6 @@ typedef struct socket_priv_t {
uint16_t port;
/** @brief The desired port specified by the user on the command line. */
uint16_t user_specified_port;
/** @brief Human-readable IP address of the federate's socket server. */
char server_hostname[INET_ADDRSTRLEN];
/** @brief Port number of the socket server of the federate. The port number will be -1 if there is no server or if
* the RTI has not been informed of the port number. */
int32_t server_port;
Expand Down Expand Up @@ -215,10 +216,11 @@ int accept_socket(int socket);
*
* @param sock The socket file descriptor that has already been created (using `socket()`).
* @param hostname The hostname or IP address of the server to connect to.
* @param ip_addr The IPv4 address to connect to. If non-NULL, bypasses DNS lookup of hostname.
* @param port The port number to connect to. If 0 is specified, a default port range will be used.
* @return 0 on success, -1 on failure, and `errno` is set to indicate the specific error.
*/
int connect_to_socket(int sock, const char* hostname, int port);
int connect_to_socket(int sock, const char* hostname, struct in_addr* ip_addr, int port);

/**
* @brief Read the specified number of bytes from the specified socket into the specified buffer.
Expand Down Expand Up @@ -296,7 +298,7 @@ bool is_socket_open(int socket);
* @ingroup Network
*
* Get the connected peer name from the connected socket.
* Set it to the server_ip_addr. Also, set server_hostname if LOG_LEVEL is higher than LOG_LEVEL_DEBUG.
* Set it to the server_ip_addr. Also, print server's hostname if LOG_LEVEL is higher than LOG_LEVEL_DEBUG.
*
* @param priv The socket_priv struct.
* @return 0 for success, -1 for failure.
Expand Down
5 changes: 2 additions & 3 deletions network/impl/src/lf_socket_support.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ net_abstraction_t initialize_net() {
priv->socket_descriptor = -1;

// Federate initialization
strncpy(priv->server_hostname, "localhost", INET_ADDRSTRLEN);
priv->server_ip_addr.s_addr = 0;
priv->server_port = -1;

Expand Down Expand Up @@ -83,11 +82,11 @@ net_abstraction_t connect_to_net(net_params_t params) {
socket_priv_t* priv = (socket_priv_t*)net;
socket_connection_params_t* sock_params = (socket_connection_params_t*)params;
priv->server_port = sock_params->port;
memcpy(priv->server_hostname, sock_params->server_hostname, INET_ADDRSTRLEN);
// Create the client network abstraction.
create_client(net);
// Connect to the target server.
if (connect_to_socket(priv->socket_descriptor, priv->server_hostname, priv->server_port) != 0) {
if (connect_to_socket(priv->socket_descriptor, sock_params->server_hostname, sock_params->server_ip_addr,
priv->server_port) != 0) {
lf_print_error("Failed to connect to socket.");
return NULL;
}
Expand Down
75 changes: 46 additions & 29 deletions network/impl/src/socket_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,10 @@ int get_peer_address(socket_priv_t* priv) {
priv->server_ip_addr = peer_addr.sin_addr;

#if LOG_LEVEL >= LOG_LEVEL_DEBUG
// Create the human readable format and copy that into
// the .server_hostname field of the federate.
// Create the human readable format for logging purposes
char str[INET_ADDRSTRLEN + 1];
inet_ntop(AF_INET, &priv->server_ip_addr, str, INET_ADDRSTRLEN);
strncpy(priv->server_hostname, str, INET_ADDRSTRLEN - 1); // Copy up to INET_ADDRSTRLEN - 1 characters
priv->server_hostname[INET_ADDRSTRLEN - 1] = '\0'; // Null-terminate explicitly

LF_PRINT_DEBUG("Got address %s", priv->server_hostname);
LF_PRINT_DEBUG("Got address %s", str);
#endif
return 0;
}
Expand Down Expand Up @@ -218,20 +214,28 @@ int accept_socket(int socket) {
return socket_id;
}

int connect_to_socket(int sock, const char* hostname, int port) {
int connect_to_socket(int sock, const char* hostname, struct in_addr* ip_addr, int port) {
struct addrinfo hints;
struct addrinfo* result;
struct addrinfo* result = NULL;
int ret = -1;

memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_INET; /* Allow IPv4 */
hints.ai_socktype = SOCK_STREAM; /* Stream socket */
hints.ai_protocol = IPPROTO_TCP; /* TCP protocol */
hints.ai_addr = NULL;
hints.ai_next = NULL;
hints.ai_flags = AI_NUMERICSERV; /* Allow only numeric port numbers */

uint16_t used_port = (port == 0) ? DEFAULT_PORT : (uint16_t)port;
struct sockaddr_in direct_addr;

if (ip_addr == NULL) {
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_INET; /* Allow IPv4 */
hints.ai_socktype = SOCK_STREAM; /* Stream socket */
hints.ai_protocol = IPPROTO_TCP; /* TCP protocol */
hints.ai_addr = NULL;
hints.ai_next = NULL;
hints.ai_flags = AI_NUMERICSERV; /* Allow only numeric port numbers */
} else {
memset(&direct_addr, 0, sizeof(direct_addr));
direct_addr.sin_family = AF_INET;
direct_addr.sin_port = htons(used_port);
direct_addr.sin_addr = *ip_addr;
}

instant_t start_connect = lf_time_physical();
// while (!_lf_termination_executed) { // Not working...
Expand All @@ -240,30 +244,43 @@ int connect_to_socket(int sock, const char* hostname, int port) {
lf_print_error("Failed to connect with timeout: " PRINTF_TIME ". Giving up.", CONNECT_TIMEOUT);
break;
}
// Convert port number to string.
char str[6];
snprintf(str, sizeof(str), "%u", used_port);

// Get address structure matching hostname and hints criteria, and
// set port to the port number provided in str. There should only
// ever be one matching address structure, and we connect to that.
if (getaddrinfo(hostname, (const char*)&str, &hints, &result)) {
lf_print_error("No host matching given hostname: %s", hostname);
break;
if (ip_addr != NULL) {
// Safe to type cast specific protocols (e.g., sockaddr_in) to the generic sockaddr.
ret = connect(sock, (struct sockaddr*)&direct_addr, sizeof(direct_addr));
} else {
// Convert port number to string.
char str[6];
snprintf(str, sizeof(str), "%u", used_port);

// Get address structure matching hostname and hints criteria, and
// set port to the port number provided in str. There should only
// ever be one matching address structure, and we connect to that.
if (getaddrinfo(hostname, (const char*)&str, &hints, &result)) {
Copy link

Copilot AI Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the getaddrinfo() call, (const char*)&str is a confusing cast of an array address to char* (and can hide type issues). Passing str directly is clearer and avoids relying on an unnecessary cast.

Suggested change
if (getaddrinfo(hostname, (const char*)&str, &hints, &result)) {
if (getaddrinfo(hostname, str, &hints, &result)) {

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

lf_print_error("No host matching given hostname: %s", hostname);
break;
}
ret = connect(sock, result->ai_addr, result->ai_addrlen);
freeaddrinfo(result);
}
ret = connect(sock, result->ai_addr, result->ai_addrlen);

if (ret < 0) {
lf_sleep(CONNECT_RETRY_INTERVAL);
lf_print_warning("Could not connect. Will try again every " PRINTF_TIME " nanoseconds. Connecting to port %d.\n",
CONNECT_RETRY_INTERVAL, used_port);
freeaddrinfo(result);
continue;
} else {
break;
}
}
freeaddrinfo(result);
lf_print_info("Connected to %s:%d.", hostname, used_port);

if (ip_addr != NULL) {
char host_str[INET_ADDRSTRLEN];
inet_ntop(AF_INET, ip_addr, host_str, INET_ADDRSTRLEN);
lf_print_info("Connected to %s:%d.", host_str, used_port);
} else {
lf_print_info("Connected to %s:%d.", hostname, used_port);
Comment on lines +277 to +282
Copy link

Copilot AI Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

connect_to_socket() logs "Connected to ..." unconditionally, even when ret remains < 0 due to timeout or repeated failures. This can mislead operators and test logs. Consider only printing the success message when ret == 0, and print a failure/timeout message (or nothing) otherwise; also handle inet_ntop() failure before using host_str.

Suggested change
if (ip_addr != NULL) {
char host_str[INET_ADDRSTRLEN];
inet_ntop(AF_INET, ip_addr, host_str, INET_ADDRSTRLEN);
lf_print_info("Connected to %s:%d.", host_str, used_port);
} else {
lf_print_info("Connected to %s:%d.", hostname, used_port);
if (ret == 0) {
if (ip_addr != NULL) {
char host_str[INET_ADDRSTRLEN];
if (inet_ntop(AF_INET, ip_addr, host_str, INET_ADDRSTRLEN) != NULL) {
lf_print_info("Connected to %s:%d.", host_str, used_port);
} else {
lf_print_info("Connected to port %d.", used_port);
}
} else {
lf_print_info("Connected to %s:%d.", hostname, used_port);
}

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need this? On line 267, which is not shown here,

    if (ret < 0) {
      lf_sleep(CONNECT_RETRY_INTERVAL);
      lf_print_warning("Could not connect. Will try again every " PRINTF_TIME " nanoseconds. Connecting to port %d.\n",
                       CONNECT_RETRY_INTERVAL, used_port);
      continue;
    } else {
      break;
    }
  }

If ret<0, it continues, so it wouldn't get to this part if ret<0.

}
return ret;
}

Expand Down
Loading