diff --git a/runtime/core/portable_type/device.h b/runtime/core/portable_type/device.h index cd15acb0cfe..41a8c6bed50 100644 --- a/runtime/core/portable_type/device.h +++ b/runtime/core/portable_type/device.h @@ -26,7 +26,6 @@ enum class DeviceType : int8_t { constexpr size_t kNumDeviceTypes = 2; /// An index representing a specific device; e.g. GPU 0 vs GPU 1. -/// -1 means the default/unspecified device for that type. using DeviceIndex = int8_t; /** @@ -41,7 +40,7 @@ struct Device final { /// Constructs a new `Device` from a `DeviceType` and an optional device /// index. - /* implicit */ Device(DeviceType type, DeviceIndex index = -1) + /* implicit */ Device(DeviceType type, DeviceIndex index = 0) : type_(type), index_(index) {} /// Returns the type of device the tensor data resides on. @@ -54,7 +53,7 @@ struct Device final { return type_ == DeviceType::CPU; } - /// Returns the device index, or -1 if default/unspecified. + /// Returns the device index. DeviceIndex index() const noexcept { return index_; } @@ -69,7 +68,7 @@ struct Device final { private: DeviceType type_; - DeviceIndex index_ = -1; + DeviceIndex index_ = 0; }; } // namespace etensor diff --git a/runtime/core/portable_type/tensor_impl.cpp b/runtime/core/portable_type/tensor_impl.cpp index ede5a3d4101..17243fca0fd 100644 --- a/runtime/core/portable_type/tensor_impl.cpp +++ b/runtime/core/portable_type/tensor_impl.cpp @@ -50,7 +50,9 @@ TensorImpl::TensorImpl( void* data, DimOrderType* dim_order, StridesType* strides, - TensorShapeDynamism dynamism) + TensorShapeDynamism dynamism, + DeviceType device_type, + DeviceIndex device_index) : sizes_(sizes), dim_order_(dim_order), strides_(strides), @@ -59,7 +61,8 @@ TensorImpl::TensorImpl( numel_(compute_numel(sizes, dim)), numel_bound_(numel_), type_(type), - shape_dynamism_(dynamism) { + shape_dynamism_(dynamism), + device_(device_type, device_index) { ET_CHECK_MSG( isValid(type_), "Invalid type %" PRId8, static_cast(type_)); ET_CHECK_MSG(dim_ >= 0, "Dimension must be non-negative, got %zd", dim_); diff --git a/runtime/core/portable_type/tensor_impl.h b/runtime/core/portable_type/tensor_impl.h index 1e2b3620ca2..ea2cde5aeb0 100644 --- a/runtime/core/portable_type/tensor_impl.h +++ b/runtime/core/portable_type/tensor_impl.h @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -99,6 +100,8 @@ class TensorImpl { * @param strides Strides of the tensor at each dimension. Must contain `dim` * entries. * @param dynamism The mutability of the shape of the tensor. + * @param device_type The type of device where tensor data resides. + * @param device_index The device index for multi-device scenarios. */ TensorImpl( ScalarType type, @@ -107,7 +110,9 @@ class TensorImpl { void* data = nullptr, DimOrderType* dim_order = nullptr, StridesType* strides = nullptr, - TensorShapeDynamism dynamism = TensorShapeDynamism::STATIC); + TensorShapeDynamism dynamism = TensorShapeDynamism::STATIC, + DeviceType device_type = DeviceType::CPU, + DeviceIndex device_index = 0); /** * Returns the size of the tensor in bytes. @@ -176,6 +181,21 @@ class TensorImpl { return shape_dynamism_; } + /// Returns the device where tensor data resides. + Device device() const { + return device_; + } + + /// Returns the type of device where tensor data resides. + DeviceType device_type() const { + return device_.type(); + } + + /// Returns the device index, or 0 if default/unspecified. + DeviceIndex device_index() const { + return device_.index(); + } + /// Returns a pointer of type T to the constant underlying data blob. template inline const T* data() const { @@ -261,6 +281,9 @@ class TensorImpl { /// Specifies the mutability of the shape of the tensor. const TensorShapeDynamism shape_dynamism_; + + /// Device where tensor data resides (CPU, CUDA, etc.) + Device device_; }; /** diff --git a/runtime/core/portable_type/test/device_test.cpp b/runtime/core/portable_type/test/device_test.cpp index d9359b2f866..c82d82a81b7 100644 --- a/runtime/core/portable_type/test/device_test.cpp +++ b/runtime/core/portable_type/test/device_test.cpp @@ -34,7 +34,7 @@ TEST(DeviceTest, CpuDefaultIndex) { Device d(DeviceType::CPU); EXPECT_TRUE(d.is_cpu()); EXPECT_EQ(d.type(), DeviceType::CPU); - EXPECT_EQ(d.index(), -1); + EXPECT_EQ(d.index(), 0); } TEST(DeviceTest, CpuExplicitIndex) { @@ -49,7 +49,7 @@ TEST(DeviceTest, CudaDefaultIndex) { Device d(DeviceType::CUDA); EXPECT_FALSE(d.is_cpu()); EXPECT_EQ(d.type(), DeviceType::CUDA); - EXPECT_EQ(d.index(), -1); + EXPECT_EQ(d.index(), 0); } TEST(DeviceTest, CudaExplicitIndex) { @@ -83,7 +83,7 @@ TEST(DeviceTest, EqualityDefaultIndices) { TEST(DeviceTest, ImplicitConstructionFromDeviceType) { // Device constructor is implicit, allowing DeviceType → Device conversion. Device d = DeviceType::CUDA; - EXPECT_EQ(d.index(), -1); + EXPECT_EQ(d.index(), 0); } // --- Deprecated namespace aliases --- diff --git a/runtime/core/portable_type/test/tensor_impl_test.cpp b/runtime/core/portable_type/test/tensor_impl_test.cpp index 0b8ae05f4da..7d045da5b3d 100644 --- a/runtime/core/portable_type/test/tensor_impl_test.cpp +++ b/runtime/core/portable_type/test/tensor_impl_test.cpp @@ -21,6 +21,9 @@ using namespace ::testing; using executorch::runtime::ArrayRef; using executorch::runtime::Error; using executorch::runtime::TensorShapeDynamism; +using executorch::runtime::etensor::Device; +using executorch::runtime::etensor::DeviceIndex; +using executorch::runtime::etensor::DeviceType; using executorch::runtime::etensor::ScalarType; using executorch::runtime::etensor::TensorImpl; using SizesType = TensorImpl::SizesType; @@ -449,3 +452,155 @@ TEST_F(TensorImplTest, TestResizingTensorToZeroAndBack) { EXPECT_GT(t.numel(), 0); EXPECT_EQ(t.data(), data); } + +// ============== Size Tests ============== + +TEST_F(TensorImplTest, TestTensorImplSize) { + // Verify TensorImpl size hasn't regressed after adding Device member. + // Device (2 bytes) fits within existing padding after type_ and + // shape_dynamism_, so sizeof(TensorImpl) should remain unchanged. + // + // Memory layout (64-bit): + // sizes_ : 8 bytes (pointer) + // dim_order_ : 8 bytes (pointer) + // strides_ : 8 bytes (pointer) + // data_ : 8 bytes (pointer) + // dim_ : 8 bytes (ssize_t) + // numel_ : 8 bytes (ssize_t) + // numel_bound_ : 8 bytes (size_t) + // type_ : 1 byte (ScalarType : int8_t) + // shape_dynamism_ : 1 byte (TensorShapeDynamism : uint8_t) + // device_ : 2 bytes (Device: DeviceType + DeviceIndex) + // padding : 4 bytes (to align struct to 8 bytes) + // Total : 64 bytes + // + // Memory layout (32-bit): + // sizes_ : 4 bytes (pointer) + // dim_order_ : 4 bytes (pointer) + // strides_ : 4 bytes (pointer) + // data_ : 4 bytes (pointer) + // dim_ : 4 bytes (ssize_t) + // numel_ : 4 bytes (ssize_t) + // numel_bound_ : 4 bytes (size_t) + // type_ : 1 byte (ScalarType : int8_t) + // shape_dynamism_ : 1 byte (TensorShapeDynamism : uint8_t) + // device_ : 2 bytes (Device: DeviceType + DeviceIndex) + // Total : 32 bytes (no additional padding needed) + +#if INTPTR_MAX == INT64_MAX + // 64-bit architecture + EXPECT_EQ(sizeof(TensorImpl), 64); +#else + // 32-bit architecture + EXPECT_EQ(sizeof(TensorImpl), 32); +#endif +} + +// ============== Device Tests ============== + +TEST_F(TensorImplTest, TestDefaultDeviceIsCPU) { + // TensorImpl constructed without device parameters should default to CPU + SizesType sizes[2] = {3, 2}; + float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + TensorImpl t(ScalarType::Float, 2, sizes, data); + + EXPECT_EQ(t.device_type(), DeviceType::CPU); + EXPECT_EQ(t.device_index(), 0); + EXPECT_EQ(t.device(), Device(DeviceType::CPU, 0)); +} + +TEST_F(TensorImplTest, TestExplicitCPUDevice) { + // TensorImpl constructed with explicit CPU device + SizesType sizes[2] = {3, 2}; + DimOrderType dim_order[2] = {0, 1}; + StridesType strides[2] = {2, 1}; + float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + TensorImpl t( + ScalarType::Float, + 2, + sizes, + data, + dim_order, + strides, + TensorShapeDynamism::STATIC, + DeviceType::CPU, + 0); + + EXPECT_EQ(t.device_type(), DeviceType::CPU); + EXPECT_EQ(t.device_index(), 0); + EXPECT_EQ(t.device(), Device(DeviceType::CPU, 0)); +} + +TEST_F(TensorImplTest, TestCUDADevice) { + // TensorImpl constructed with CUDA device + SizesType sizes[2] = {3, 2}; + DimOrderType dim_order[2] = {0, 1}; + StridesType strides[2] = {2, 1}; + float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + TensorImpl t( + ScalarType::Float, + 2, + sizes, + data, + dim_order, + strides, + TensorShapeDynamism::STATIC, + DeviceType::CUDA, + 0); + + EXPECT_EQ(t.device_type(), DeviceType::CUDA); + EXPECT_EQ(t.device_index(), 0); + EXPECT_EQ(t.device(), Device(DeviceType::CUDA, 0)); +} + +TEST_F(TensorImplTest, TestCUDADeviceMultiGPU) { + // TensorImpl with CUDA device index 1 (second GPU) + SizesType sizes[2] = {3, 2}; + DimOrderType dim_order[2] = {0, 1}; + StridesType strides[2] = {2, 1}; + float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + TensorImpl t( + ScalarType::Float, + 2, + sizes, + data, + dim_order, + strides, + TensorShapeDynamism::STATIC, + DeviceType::CUDA, + 1); + + EXPECT_EQ(t.device_type(), DeviceType::CUDA); + EXPECT_EQ(t.device_index(), 1); + EXPECT_EQ(t.device(), Device(DeviceType::CUDA, 1)); +} + +TEST_F(TensorImplTest, TestDeviceWithDynamicTensor) { + // Device info should work correctly with dynamic tensors + SizesType sizes[2] = {3, 2}; + DimOrderType dim_order[2] = {0, 1}; + StridesType strides[2] = {2, 1}; + float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + TensorImpl t( + ScalarType::Float, + 2, + sizes, + data, + dim_order, + strides, + TensorShapeDynamism::DYNAMIC_BOUND, + DeviceType::CUDA, + 0); + + EXPECT_EQ(t.device_type(), DeviceType::CUDA); + EXPECT_EQ(t.device_index(), 0); + + // Resize should not affect device + SizesType new_sizes[2] = {2, 2}; + Error err = resize_tensor_impl(&t, {new_sizes, 2}); + EXPECT_EQ(err, Error::Ok); + + // Device should remain unchanged after resize + EXPECT_EQ(t.device_type(), DeviceType::CUDA); + EXPECT_EQ(t.device_index(), 0); +}