diff --git a/runtime/core/portable_type/device.h b/runtime/core/portable_type/device.h
index cd15acb0cfe..41a8c6bed50 100644
--- a/runtime/core/portable_type/device.h
+++ b/runtime/core/portable_type/device.h
@@ -26,7 +26,6 @@ enum class DeviceType : int8_t {
 constexpr size_t kNumDeviceTypes = 2;
 
 /// An index representing a specific device; e.g. GPU 0 vs GPU 1.
-/// -1 means the default/unspecified device for that type.
 using DeviceIndex = int8_t;
 
 /**
@@ -41,7 +40,7 @@ struct Device final {
 
   /// Constructs a new `Device` from a `DeviceType` and an optional device
   /// index.
-  /* implicit */ Device(DeviceType type, DeviceIndex index = -1)
+  /* implicit */ Device(DeviceType type, DeviceIndex index = 0)
       : type_(type), index_(index) {}
 
   /// Returns the type of device the tensor data resides on.
@@ -54,7 +53,7 @@ struct Device final {
     return type_ == DeviceType::CPU;
   }
 
-  /// Returns the device index, or -1 if default/unspecified.
+  /// Returns the device index.
   DeviceIndex index() const noexcept {
     return index_;
   }
@@ -69,7 +68,7 @@ struct Device final {
 
  private:
   DeviceType type_;
-  DeviceIndex index_ = -1;
+  DeviceIndex index_ = 0;
 };
 
 } // namespace etensor
diff --git a/runtime/core/portable_type/tensor_impl.cpp b/runtime/core/portable_type/tensor_impl.cpp
index ede5a3d4101..17243fca0fd 100644
--- a/runtime/core/portable_type/tensor_impl.cpp
+++ b/runtime/core/portable_type/tensor_impl.cpp
@@ -50,7 +50,9 @@ TensorImpl::TensorImpl(
     void* data,
     DimOrderType* dim_order,
     StridesType* strides,
-    TensorShapeDynamism dynamism)
+    TensorShapeDynamism dynamism,
+    DeviceType device_type,
+    DeviceIndex device_index)
     : sizes_(sizes),
       dim_order_(dim_order),
       strides_(strides),
@@ -59,7 +61,8 @@ TensorImpl::TensorImpl(
       numel_(compute_numel(sizes, dim)),
       numel_bound_(numel_),
       type_(type),
-      shape_dynamism_(dynamism) {
+      shape_dynamism_(dynamism),
+      device_(device_type, device_index) {
   ET_CHECK_MSG(
       isValid(type_), "Invalid type %" PRId8, static_cast<int8_t>(type_));
   ET_CHECK_MSG(dim_ >= 0, "Dimension must be non-negative, got %zd", dim_);
diff --git a/runtime/core/portable_type/tensor_impl.h b/runtime/core/portable_type/tensor_impl.h
index 1e2b3620ca2..ea2cde5aeb0 100644
--- a/runtime/core/portable_type/tensor_impl.h
+++ b/runtime/core/portable_type/tensor_impl.h
@@ -10,6 +10,7 @@
 
 #include <executorch/runtime/core/array_ref.h>
 #include <executorch/runtime/core/error.h>
+#include <executorch/runtime/core/portable_type/device.h>
 #include <executorch/runtime/core/portable_type/scalar_type.h>
 #include <executorch/runtime/core/tensor_shape_dynamism.h>
 
@@ -99,6 +100,8 @@ class TensorImpl {
    * @param strides Strides of the tensor at each dimension. Must contain `dim`
    *     entries.
    * @param dynamism The mutability of the shape of the tensor.
+   * @param device_type The type of device where tensor data resides.
+   * @param device_index The device index for multi-device scenarios.
    */
   TensorImpl(
       ScalarType type,
@@ -107,7 +110,9 @@ class TensorImpl {
       void* data = nullptr,
       DimOrderType* dim_order = nullptr,
       StridesType* strides = nullptr,
-      TensorShapeDynamism dynamism = TensorShapeDynamism::STATIC);
+      TensorShapeDynamism dynamism = TensorShapeDynamism::STATIC,
+      DeviceType device_type = DeviceType::CPU,
+      DeviceIndex device_index = 0);
 
   /**
    * Returns the size of the tensor in bytes.
@@ -176,6 +181,21 @@ class TensorImpl {
     return shape_dynamism_;
   }
 
+  /// Returns the device where tensor data resides.
+  Device device() const {
+    return device_;
+  }
+
+  /// Returns the type of device where tensor data resides.
+  DeviceType device_type() const {
+    return device_.type();
+  }
+
+  /// Returns the device index, or 0 if default/unspecified.
+  DeviceIndex device_index() const {
+    return device_.index();
+  }
+
   /// Returns a pointer of type T to the constant underlying data blob.
   template <typename T>
   inline const T* data() const {
@@ -261,6 +281,9 @@ class TensorImpl {
 
   /// Specifies the mutability of the shape of the tensor.
   const TensorShapeDynamism shape_dynamism_;
+
+  /// Device where tensor data resides (CPU, CUDA, etc.)
+  Device device_;
 };
 
 /**
diff --git a/runtime/core/portable_type/test/device_test.cpp b/runtime/core/portable_type/test/device_test.cpp
index d9359b2f866..c82d82a81b7 100644
--- a/runtime/core/portable_type/test/device_test.cpp
+++ b/runtime/core/portable_type/test/device_test.cpp
@@ -34,7 +34,7 @@ TEST(DeviceTest, CpuDefaultIndex) {
   Device d(DeviceType::CPU);
   EXPECT_TRUE(d.is_cpu());
   EXPECT_EQ(d.type(), DeviceType::CPU);
-  EXPECT_EQ(d.index(), -1);
+  EXPECT_EQ(d.index(), 0);
 }
 
 TEST(DeviceTest, CpuExplicitIndex) {
@@ -49,7 +49,7 @@ TEST(DeviceTest, CudaDefaultIndex) {
   Device d(DeviceType::CUDA);
   EXPECT_FALSE(d.is_cpu());
   EXPECT_EQ(d.type(), DeviceType::CUDA);
-  EXPECT_EQ(d.index(), -1);
+  EXPECT_EQ(d.index(), 0);
 }
 
 TEST(DeviceTest, CudaExplicitIndex) {
@@ -83,7 +83,7 @@ TEST(DeviceTest, EqualityDefaultIndices) {
 TEST(DeviceTest, ImplicitConstructionFromDeviceType) {
   // Device constructor is implicit, allowing DeviceType → Device conversion.
   Device d = DeviceType::CUDA;
-  EXPECT_EQ(d.index(), -1);
+  EXPECT_EQ(d.index(), 0);
 }
 
 // --- Deprecated namespace aliases ---
diff --git a/runtime/core/portable_type/test/tensor_impl_test.cpp b/runtime/core/portable_type/test/tensor_impl_test.cpp
index 0b8ae05f4da..7d045da5b3d 100644
--- a/runtime/core/portable_type/test/tensor_impl_test.cpp
+++ b/runtime/core/portable_type/test/tensor_impl_test.cpp
@@ -21,6 +21,9 @@ using namespace ::testing;
 using executorch::runtime::ArrayRef;
 using executorch::runtime::Error;
 using executorch::runtime::TensorShapeDynamism;
+using executorch::runtime::etensor::Device;
+using executorch::runtime::etensor::DeviceIndex;
+using executorch::runtime::etensor::DeviceType;
 using executorch::runtime::etensor::ScalarType;
 using executorch::runtime::etensor::TensorImpl;
 using SizesType = TensorImpl::SizesType;
@@ -449,3 +452,155 @@ TEST_F(TensorImplTest, TestResizingTensorToZeroAndBack) {
   EXPECT_GT(t.numel(), 0);
   EXPECT_EQ(t.data(), data);
 }
+
+// ============== Size Tests ==============
+
+TEST_F(TensorImplTest, TestTensorImplSize) {
+  // Verify TensorImpl size hasn't regressed after adding Device member.
+  // Device (2 bytes) fits within existing padding after type_ and
+  // shape_dynamism_, so sizeof(TensorImpl) should remain unchanged.
+  //
+  // Memory layout (64-bit):
+  //   sizes_          : 8 bytes (pointer)
+  //   dim_order_      : 8 bytes (pointer)
+  //   strides_        : 8 bytes (pointer)
+  //   data_           : 8 bytes (pointer)
+  //   dim_            : 8 bytes (ssize_t)
+  //   numel_          : 8 bytes (ssize_t)
+  //   numel_bound_    : 8 bytes (size_t)
+  //   type_           : 1 byte  (ScalarType : int8_t)
+  //   shape_dynamism_ : 1 byte  (TensorShapeDynamism : uint8_t)
+  //   device_         : 2 bytes (Device: DeviceType + DeviceIndex)
+  //   padding         : 4 bytes (to align struct to 8 bytes)
+  //   Total           : 64 bytes
+  //
+  // Memory layout (32-bit):
+  //   sizes_          : 4 bytes (pointer)
+  //   dim_order_      : 4 bytes (pointer)
+  //   strides_        : 4 bytes (pointer)
+  //   data_           : 4 bytes (pointer)
+  //   dim_            : 4 bytes (ssize_t)
+  //   numel_          : 4 bytes (ssize_t)
+  //   numel_bound_    : 4 bytes (size_t)
+  //   type_           : 1 byte  (ScalarType : int8_t)
+  //   shape_dynamism_ : 1 byte  (TensorShapeDynamism : uint8_t)
+  //   device_         : 2 bytes (Device: DeviceType + DeviceIndex)
+  //   Total           : 32 bytes (no additional padding needed)
+
+#if INTPTR_MAX == INT64_MAX
+  // 64-bit architecture
+  EXPECT_EQ(sizeof(TensorImpl), 64);
+#else
+  // 32-bit architecture
+  EXPECT_EQ(sizeof(TensorImpl), 32);
+#endif
+}
+
+// ============== Device Tests ==============
+
+TEST_F(TensorImplTest, TestDefaultDeviceIsCPU) {
+  // TensorImpl constructed without device parameters should default to CPU
+  SizesType sizes[2] = {3, 2};
+  float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+  TensorImpl t(ScalarType::Float, 2, sizes, data);
+
+  EXPECT_EQ(t.device_type(), DeviceType::CPU);
+  EXPECT_EQ(t.device_index(), 0);
+  EXPECT_EQ(t.device(), Device(DeviceType::CPU, 0));
+}
+
+TEST_F(TensorImplTest, TestExplicitCPUDevice) {
+  // TensorImpl constructed with explicit CPU device
+  SizesType sizes[2] = {3, 2};
+  DimOrderType dim_order[2] = {0, 1};
+  StridesType strides[2] = {2, 1};
+  float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+  TensorImpl t(
+      ScalarType::Float,
+      2,
+      sizes,
+      data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CPU,
+      0);
+
+  EXPECT_EQ(t.device_type(), DeviceType::CPU);
+  EXPECT_EQ(t.device_index(), 0);
+  EXPECT_EQ(t.device(), Device(DeviceType::CPU, 0));
+}
+
+TEST_F(TensorImplTest, TestCUDADevice) {
+  // TensorImpl constructed with CUDA device
+  SizesType sizes[2] = {3, 2};
+  DimOrderType dim_order[2] = {0, 1};
+  StridesType strides[2] = {2, 1};
+  float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+  TensorImpl t(
+      ScalarType::Float,
+      2,
+      sizes,
+      data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CUDA,
+      0);
+
+  EXPECT_EQ(t.device_type(), DeviceType::CUDA);
+  EXPECT_EQ(t.device_index(), 0);
+  EXPECT_EQ(t.device(), Device(DeviceType::CUDA, 0));
+}
+
+TEST_F(TensorImplTest, TestCUDADeviceMultiGPU) {
+  // TensorImpl with CUDA device index 1 (second GPU)
+  SizesType sizes[2] = {3, 2};
+  DimOrderType dim_order[2] = {0, 1};
+  StridesType strides[2] = {2, 1};
+  float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+  TensorImpl t(
+      ScalarType::Float,
+      2,
+      sizes,
+      data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::STATIC,
+      DeviceType::CUDA,
+      1);
+
+  EXPECT_EQ(t.device_type(), DeviceType::CUDA);
+  EXPECT_EQ(t.device_index(), 1);
+  EXPECT_EQ(t.device(), Device(DeviceType::CUDA, 1));
+}
+
+TEST_F(TensorImplTest, TestDeviceWithDynamicTensor) {
+  // Device info should work correctly with dynamic tensors
+  SizesType sizes[2] = {3, 2};
+  DimOrderType dim_order[2] = {0, 1};
+  StridesType strides[2] = {2, 1};
+  float data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+  TensorImpl t(
+      ScalarType::Float,
+      2,
+      sizes,
+      data,
+      dim_order,
+      strides,
+      TensorShapeDynamism::DYNAMIC_BOUND,
+      DeviceType::CUDA,
+      0);
+
+  EXPECT_EQ(t.device_type(), DeviceType::CUDA);
+  EXPECT_EQ(t.device_index(), 0);
+
+  // Resize should not affect device
+  SizesType new_sizes[2] = {2, 2};
+  Error err = resize_tensor_impl(&t, {new_sizes, 2});
+  EXPECT_EQ(err, Error::Ok);
+
+  // Device should remain unchanged after resize
+  EXPECT_EQ(t.device_type(), DeviceType::CUDA);
+  EXPECT_EQ(t.device_index(), 0);
+}