Skip to content

Commit

Permalink
Hotfix PaddedBatch.is_seq_feature (#101)
Browse files Browse the repository at this point in the history
* fix padded batch for text event time
* add test for padded batch with text event time
  • Loading branch information
ArtyomVorobev authored Feb 10, 2023
1 parent 8dbc589 commit 40b34f4
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 5 deletions.
12 changes: 7 additions & 5 deletions ptls/data_load/padded_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,15 @@ def is_seq_feature(k: str, x):
-------
"""
if not FeatureDict.is_seq_feature(k, x):
return False # target fields
if type(x) is np.ndarray:
if k == 'event_time':
return True
if k.startswith('target'):
return False
if len(x.shape) == 1:
if type(x) is np.ndarray:
return False
if type(x) is torch.Tensor and len(x.shape) == 1:
return False
return True
return True

def drop_seq_features(self):
"""Returns new dict without sequential features
Expand Down
39 changes: 39 additions & 0 deletions ptls_tests/test_data_load/test_padded_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,29 @@ def get_pb():
length=torch.IntTensor([2, 4])
)

def get_pb_with_text_event_time():
return PaddedBatch(
payload={
'bin': torch.IntTensor([0, 1]),
'target_bin': torch.IntTensor([2, 3]),
'pp': torch.FloatTensor([0.1, 0.2]),
'user_id': np.array(['a', 'b']),
'lists': np.array([[5, 6], [7, 8]]),
'mcc': torch.tensor([
[1, 2, 0, 0],
[3, 4, 5, 6],
]),
'event_time': np.array([
['0 10:00:05', '0 11:00:00', '2 11:12:10', '2 12:00:00'],
['1 10:00:05', '3 11:00:00', '4 11:12:10', '4 12:00:00'],
]),
'target_array': torch.tensor([
[1, 2, 2, 5],
[1, 2, 3, 4],
]),
},
length=torch.IntTensor([2, 4])
)

def test_padded_batch_example():
data = PaddedBatch(
Expand Down Expand Up @@ -122,6 +145,22 @@ def test_padded_batch_is_seq_feature():
]:
assert is_seq == PaddedBatch.is_seq_feature(col, x.payload[col]), col

# is seq feature
def test_padded_batch_is_seq_feature_with_text_event_time():
x_text = get_pb_with_text_event_time()

for col, is_seq in [
('bin', False),
('target_bin', False),
('pp', False),
('user_id', False),
('lists', False),
('mcc', True),
('event_time', True),
('target_array', False),
]:
assert is_seq == PaddedBatch.is_seq_feature(col, x_text.payload[col]), col


def test_padded_batch_drop_seq_features():
x = get_pb()
Expand Down

0 comments on commit 40b34f4

Please sign in to comment.