@@ -832,6 +832,9 @@ void ExposeTensor(py::module &m) {
832832 },
833833 R"code( Passthrough, since the object is already an instance of `TensorCPU`.)code" ,
834834 py::return_value_policy::reference_internal)
835+ .def (" _set_stream" , [](Tensor<CPUBackend> &t, py::object stream) {
836+ t.set_order (AccessOrderFromPythonStreamObj (stream));
837+ })
835838 .def (" _make_copy" , [](const Tensor<CPUBackend> &t) {
836839 auto dst = std::make_unique<Tensor<CPUBackend>>();
837840 dst->set_device_id (t.device_id ());
@@ -997,6 +1000,7 @@ void ExposeTensor(py::module &m) {
9971000 DeviceGuard g (t.device_id ());
9981001 auto ret = std::make_unique<Tensor<CPUBackend>>();
9991002 ret->set_pinned (false );
1003+ ret->set_order (AccessOrder::host ());
10001004 UserStream * us = UserStream::Get ();
10011005 cudaStream_t s = us->GetStream (t);
10021006 ret->Copy (t, s);
@@ -1007,6 +1011,9 @@ void ExposeTensor(py::module &m) {
10071011 Returns a `TensorCPU` object being a copy of this `TensorGPU`.
10081012 )code" ,
10091013 py::return_value_policy::take_ownership)
1014+ .def (" _set_stream" , [](Tensor<GPUBackend> &t, py::object stream) {
1015+ t.set_order (AccessOrderFromPythonStreamObj (stream));
1016+ })
10101017 .def (" _make_copy" , [](const Tensor<GPUBackend> &t) {
10111018 DeviceGuard dg (t.device_id ());
10121019 auto dst = std::make_unique<Tensor<GPUBackend>>();
@@ -1112,7 +1119,9 @@ std::unique_ptr<Tensor<Backend> > TensorListGetItemImpl(TensorList<Backend> &t,
11121119 auto ptr = std::make_unique<Tensor<Backend>>();
11131120 // TODO(klecki): Rework this with proper sample-based tensor batch data structure
11141121 auto &sample_shared_ptr = unsafe_sample_owner (t, id);
1115- ptr->ShareData (sample_shared_ptr, t.capacity (), t.is_pinned (), t.shape ()[id], t.type (),
1122+ auto &tshape = t.tensor_shape (id);
1123+ size_t num_bytes = tshape.num_elements () * t.type_info ().size ();
1124+ ptr->ShareData (sample_shared_ptr, num_bytes, t.is_pinned (), tshape, t.type (),
11161125 t.device_id (), t.order (), t.ready_event ());
11171126 ptr->SetMeta (t.GetMeta (id));
11181127 return ptr;
@@ -1360,6 +1369,9 @@ void ExposeTensorListCPU(py::module &m) {
13601369 return t;
13611370 }, R"code( Passthrough, as it is already an instance of `TensorListCPU`.)code" ,
13621371 py::return_value_policy::reference_internal)
1372+ .def (" _set_stream" , [](TensorList<CPUBackend> &t, py::object stream) {
1373+ t.set_order (AccessOrderFromPythonStreamObj (stream));
1374+ })
13631375 .def (" _make_copy" , [](const TensorList<CPUBackend> &t) {
13641376 auto dst = std::make_shared<TensorList<CPUBackend>>();
13651377 dst->set_device_id (t.device_id ());
@@ -1625,6 +1637,7 @@ void ExposeTesorListGPU(py::module &m) {
16251637 DeviceGuard g (t.device_id ());
16261638 auto ret = std::make_shared<TensorList<CPUBackend>>();
16271639 ret->set_pinned (false );
1640+ ret->set_order (AccessOrder::host ());
16281641 ret->SetContiguity (BatchContiguity::Contiguous);
16291642 UserStream * us = UserStream::Get ();
16301643 cudaStream_t s = us->GetStream (t);
@@ -1636,6 +1649,9 @@ void ExposeTesorListGPU(py::module &m) {
16361649 Returns a `TensorListCPU` object being a copy of this `TensorListGPU`.
16371650 )code" ,
16381651 py::return_value_policy::take_ownership)
1652+ .def (" _set_stream" , [](TensorList<GPUBackend> &t, py::object stream) {
1653+ t.set_order (AccessOrderFromPythonStreamObj (stream));
1654+ })
16391655 .def (" _make_copy" , [](const TensorList<GPUBackend> &tl) {
16401656 DeviceGuard dg (tl.device_id ());
16411657 auto dst = std::make_shared<TensorList<GPUBackend>>();
0 commit comments