Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased
#### Added
- Add memory analysis utility for trace internal representation
- Add trace format validator
- Added multiple trace filter classes and demos.
- Added enhanced trace call stack graph implementation.
Expand Down
40 changes: 40 additions & 0 deletions hta/utils/memory_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import pandas as pd


def get_memory_usage_in_MB(df: pd.DataFrame) -> float:
"""Get the memory usage of a trace dataframe in megabytes (MB)."""
memory_per_column = df.memory_usage(deep=True)
total_memory = memory_per_column.sum()
total_memory_mb = total_memory / (1024 * 1024)
return total_memory_mb


def analyze_memory_usage(df: pd.DataFrame) -> pd.DataFrame:
"""
Analyze the memory usage of a trace dataframe.

Args:
df (pd.DataFrame): The input dataframe to analyze.

Returns:
pd.DataFrame: A new dataframe containing the memory usage analysis.
"""
_df = pd.DataFrame(
{
"Memory (MB)": df.memory_usage(deep=True),
"Count": df.count(),
"DType": df.dtypes,
}
)
_df.dropna(inplace=True)
_df["Memory (MB)"] = _df["Memory (MB)"] / (1024 * 1024)
_df["Count"] = _df["Count"].astype(int)
_df["Memory Per Item (B)"] = (
_df["Memory (MB)"] * 1024 * 1024 / _df["Count"]
).astype(int)

return _df.round(2)
27 changes: 27 additions & 0 deletions tests/test_memory_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import unittest

import pandas as pd

from hta.utils.memory_utils import analyze_memory_usage


class TestMemoryUtility(unittest.TestCase):
def test_analyze_memory_usage(self):
n = 1024 * 1024
data = {
"A": [1] * n,
"B": [2.3] * n,
"C": ["7"] * n,
}
df = pd.DataFrame(data)

result = analyze_memory_usage(df)
self.assertIsInstance(result, pd.DataFrame)
self.assertEqual(
set(result.columns),
{"Memory (MB)", "Count", "DType", "Memory Per Item (B)"},
)
self.assertListEqual(result["Memory (MB)"].tolist(), [8.0, 8.0, 58.0])
self.assertListEqual(result["Count"].tolist(), [n, n, n])
self.assertListEqual(result["DType"].tolist(), ["int64", "float64", "object"])
self.assertListEqual(result["Memory Per Item (B)"].tolist(), [8, 8, 58])