Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions config/chassis_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import subprocess
import utilities_common.cli as clicommon
from utilities_common.chassis import is_smartswitch, get_all_dpus
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone

TIMEOUT_SECS = 10
TRANSITION_TIMEOUT = timedelta(seconds=240) # 4 minutes
Expand All @@ -27,6 +27,12 @@ def set_entry(self, table, key, entry):
for field, value in entry.items():
self.db.set("STATE_DB", redis_key, field, value)

def delete_field(self, table, key, field):
"""Delete a specific field from table|key."""
redis_key = f"{table}|{key}"
client = self.db.get_redis_client("STATE_DB")
return client.hdel(redis_key, field)
Copy link
Contributor

@gpunathilell gpunathilell Oct 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gpunathilell Thank you for the suggestion about using the database connector abstraction consistently. While I agree this would improve consistency, this logic is currently undergoing broader refactoring in PRs #4031 and related PRs across sonic-platform-common, sonic-host-services, and sonic-platform-daemons. To avoid conflicts and duplicated effort, I believe the database access pattern consistency should be addressed as part of that broader refactoring work rather than making incremental changes here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since refactoring will be done, please do not resolve the review comments, I will approve as this is blocking bug fix. Thank you


#
# 'chassis_modules' group ('config chassis_modules ...')
#
Expand Down Expand Up @@ -72,8 +78,9 @@ def set_state_transition_in_progress(db, chassis_module_name, value):
entry = state_db.get_entry('CHASSIS_MODULE_TABLE', chassis_module_name) or {}
entry['state_transition_in_progress'] = value
if value == 'True':
entry['transition_start_time'] = datetime.utcnow().isoformat()
entry['transition_start_time'] = datetime.now(timezone.utc).isoformat()
else:
# Remove transition_start_time from both local entry and database
entry.pop('transition_start_time', None)
state_db.delete_field('CHASSIS_MODULE_TABLE', chassis_module_name, 'transition_start_time')
state_db.set_entry('CHASSIS_MODULE_TABLE', chassis_module_name, entry)
Expand All @@ -92,7 +99,14 @@ def is_transition_timed_out(db, chassis_module_name):
start_time = datetime.fromisoformat(start_time_str)
except ValueError:
return False
return datetime.utcnow() - start_time > TRANSITION_TIMEOUT

# Use UTC everywhere for consistent comparison
current_time = datetime.now(timezone.utc)
if start_time.tzinfo is None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rameshraghupathy do we need to handle a case when tzinfo is none? as it is always updated by chassisd and module_base. why is this if condition needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gpunathilell It is a T1 switch and there is nothing wrong in doing the additional check to be resilient.

# If stored time is naive, assume it's UTC
start_time = start_time.replace(tzinfo=timezone.utc)

return current_time - start_time > TRANSITION_TIMEOUT

#
# Name: check_config_module_state_with_timeout
Expand Down
30 changes: 25 additions & 5 deletions tests/chassis_modules_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import sys
import os
from click.testing import CliRunner
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from config.chassis_modules import (
set_state_transition_in_progress,
is_transition_timed_out,
Expand Down Expand Up @@ -490,7 +490,7 @@ def test_shutdown_triggers_transition_in_progress(self):
fvs = {
'admin_status': 'up',
'state_transition_in_progress': 'True',
'transition_start_time': datetime.utcnow().isoformat()
'transition_start_time': datetime.now(timezone.utc).isoformat()
}
db.cfgdb.set_entry('CHASSIS_MODULE', "DPU0", fvs)

Expand All @@ -517,7 +517,7 @@ def test_shutdown_triggers_transition_timeout(self):
fvs = {
'admin_status': 'up',
'state_transition_in_progress': 'True',
'transition_start_time': (datetime.utcnow() - timedelta(minutes=30)).isoformat()
'transition_start_time': (datetime.now(timezone.utc) - timedelta(minutes=30)).isoformat()
}
db.cfgdb.set_entry('CHASSIS_MODULE', "DPU0", fvs)

Expand Down Expand Up @@ -593,15 +593,35 @@ def test_is_transition_timed_out_all_paths(self):
assert is_transition_timed_out(db, "DPU0") is False

# Case 4: Timed out
old_time = (datetime.utcnow() - TRANSITION_TIMEOUT - timedelta(seconds=1)).isoformat()
old_time = (datetime.now(timezone.utc) - TRANSITION_TIMEOUT - timedelta(seconds=1)).isoformat()
db.statedb.get_entry.return_value = {"transition_start_time": old_time}
assert is_transition_timed_out(db, "DPU0") is True

# Case 5: Not timed out yet
now = datetime.utcnow().isoformat()
now = datetime.now(timezone.utc).isoformat()
db.statedb.get_entry.return_value = {"transition_start_time": now}
assert is_transition_timed_out(db, "DPU0") is False

def test_delete_field(self):
"""Single test to cover missing delete_field and timezone handling lines"""
from config.chassis_modules import StateDBHelper
from datetime import timezone

# Test delete_field method (covers lines 32-34)
mock_sonic_db = mock.MagicMock()
mock_redis_client = mock.MagicMock()
mock_sonic_db.get_redis_client.return_value = mock_redis_client
helper = StateDBHelper(mock_sonic_db)
helper.delete_field('TEST_TABLE', 'test_key', 'test_field')
mock_redis_client.hdel.assert_called_once_with("TEST_TABLE|test_key", "test_field")

# Test timezone-aware datetime handling (covers line 109)
db = mock.MagicMock()
db.statedb = mock.MagicMock()
tz_time = (datetime.now(timezone.utc) - TRANSITION_TIMEOUT - timedelta(seconds=1)).isoformat()
db.statedb.get_entry.return_value = {"transition_start_time": tz_time}
assert is_transition_timed_out(db, "DPU0") is True

@classmethod
def teardown_class(cls):
print("TEARDOWN")
Expand Down
Loading