diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp
index 8a2985a284769a..aa067da4f360fd 100644
--- a/src/common/transformations/src/transformations/convert_precision.cpp
+++ b/src/common/transformations/src/transformations/convert_precision.cpp
@@ -8,6 +8,7 @@
 #include <vector>
 
 #include "itt.hpp"
+#include "openvino/core/rt_info/weightless_caching_attributes.hpp"
 #include "openvino/op/ops.hpp"
 #include "openvino/pass/constant_folding.hpp"
 #include "openvino/pass/manager.hpp"
@@ -1405,6 +1406,13 @@ bool fuse_type_to_constant(const std::shared_ptr<ov::Node>& node,
         new_const->validate_and_infer_types();
         new_const->set_friendly_name(constant->get_friendly_name());
         ov::copy_runtime_info(constant, new_const);
+
+        const auto& rt_info = node->get_rt_info();
+        auto weightless_caching_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static());
+        if (weightless_caching_attr != rt_info.end()) {
+            new_const->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] =
+                weightless_caching_attr->second;
+        }
         return true;
     }
     return false;
diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp
index 318f15ab1a64dc..c2b7133506aebe 100644
--- a/src/common/transformations/tests/utils/convert_precision.cpp
+++ b/src/common/transformations/tests/utils/convert_precision.cpp
@@ -13,6 +13,7 @@
 
 #include "common_test_utils/ov_test_utils.hpp"
 #include "openvino/core/model.hpp"
+#include "openvino/core/rt_info/weightless_caching_attributes.hpp"
 #include "openvino/opsets/opset1.hpp"
 #include "openvino/opsets/opset10.hpp"
 #include "openvino/opsets/opset15.hpp"
@@ -2702,3 +2703,38 @@ TEST(TransformationTests, ConvertPrecision_assign_read_value_preserve_orig_types
     FunctionsComparator::Result result = func_comparator(model_ref, model);
     ASSERT_TRUE(result.valid) << result.message;
 }
+
+TEST(TransformationTests, ConvertPrecision_assign_read_value_preserve_weightless_cache_info_as_rt_attribute) {
+    pass::Manager manager;
+
+    auto some_value = opset10::Constant::create(element::f32, Shape{1}, {2});
+    auto& node_rt_info = some_value->get_rt_info();
+    ov::WeightlessCacheAttribute attr(element::f32.size(), 0, element::f32);
+    node_rt_info[ov::WeightlessCacheAttribute::get_type_info_static()] = attr;
+
+    ov::ParameterVector inputParams;
+    ov::ResultVector results;
+    results.push_back(std::make_shared<ov::op::v0::Result>(some_value->output(0)));
+    auto model = std::make_shared<ov::Model>(results, inputParams);
+
+    type_to_fuse_map empty_type_to_fuse_map = {};
+    bool keep_precision_sensitive_in_fp32 = false;
+    bool convert_input_output_precision = false;
+    bool store_original_precision_as_rt_attribute = true;
+    manager.register_pass<pass::ConvertPrecision>(precisions_map{{element::f32, element::f16}},
+                                                  empty_type_to_fuse_map,
+                                                  keep_precision_sensitive_in_fp32,
+                                                  convert_input_output_precision,
+                                                  store_original_precision_as_rt_attribute);
+    manager.run_passes(model);
+
+    const auto& ops = model->get_ops();
+    auto it = std::find_if(ops.begin(), ops.end(), [](const std::shared_ptr<Node>& node) {
+        return ov::op::util::is_constant(node);
+    });
+
+    ASSERT_TRUE(it != ops.end());
+    const auto& new_rt_info = (*it)->get_rt_info();
+    auto weightless_caching_attr_it = new_rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static());
+    ASSERT_TRUE(weightless_caching_attr_it != new_rt_info.end());
+}
diff --git a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp
index fedcb030fb52cf..e3cf2609b26c8d 100644
--- a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp
+++ b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp
@@ -5,6 +5,7 @@
 #pragma once
 
 #include "openvino/core/core_visibility.hpp"
+#include "openvino/core/node.hpp"
 #include "openvino/core/runtime_attribute.hpp"
 
 namespace ov {
@@ -25,14 +26,16 @@ class OPENVINO_API WeightlessCacheAttribute : public RuntimeAttribute {
 
     WeightlessCacheAttribute() = delete;
 
-    WeightlessCacheAttribute(size_t original_size, size_t bin_offset)
+    WeightlessCacheAttribute(size_t original_size, size_t bin_offset, ov::element::Type original_dtype)
         : original_size(original_size),
-          bin_offset(bin_offset) {}
+          bin_offset(bin_offset),
+          original_dtype(original_dtype) {}
 
     bool is_copyable() const override;
 
     size_t original_size;
     size_t bin_offset;
+    ov::element::Type original_dtype;
 };
 
 }  // namespace ov
diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp
index 2d1dfba956ea72..d7e250f9916302 100644
--- a/src/frontends/ir/src/ir_deserializer.cpp
+++ b/src/frontends/ir/src/ir_deserializer.cpp
@@ -950,10 +950,12 @@ std::shared_ptr<ov::Node> ov::XmlDeserializer::create_node(const std::vector<ov:
         }
         const auto size = dn.attribute("size");
         const auto offset = dn.attribute("offset");
-        if (size && offset) {
+        const auto element_type = dn.attribute("element_type");
+        if (size && offset && element_type) {
             rtInfo[ov::WeightlessCacheAttribute::get_type_info_static()] =
                 ov::WeightlessCacheAttribute(static_cast<size_t>(pugixml::get_uint64_attr(dn, "size")),
-                                             static_cast<size_t>(pugixml::get_uint64_attr(dn, "offset")));
+                                             static_cast<size_t>(pugixml::get_uint64_attr(dn, "offset")),
+                                             ov::element::Type(pugixml::get_str_attr(dn, "element_type")));
         }
     }
 
diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp
index 461f063ec26bc5..8a9a35b1e92fe9 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp
@@ -4,15 +4,170 @@
 
 #pragma once
 #include <climits>
+#include <algorithm>
 
 #include "intel_gpu/runtime/engine.hpp"
 #include "intel_gpu/runtime/memory.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/util/op_types.hpp"
+#include "openvino/pass/manager.hpp"
 #include "openvino/runtime/shared_buffer.hpp"
 #include "openvino/util/mmap_object.hpp"
 #include "primitive.hpp"
+#include "transformations/convert_precision.hpp"
 
 namespace cldnn {
 
+struct weights_mem {
+    std::shared_ptr<ov::SharedBuffer<std::shared_ptr<ov::MappedMemory>>> shared_buf = nullptr;
+    std::shared_ptr<ov::op::v0::Constant> transformed_constant = nullptr;
+
+    const uint8_t* get_loaded_data() {
+        if (transformed_constant) {
+            return reinterpret_cast<const uint8_t*>(transformed_constant->get_data_ptr());
+        }
+        OPENVINO_ASSERT(shared_buf);
+        return shared_buf->get_ptr<uint8_t>();
+    }
+};
+
+struct weightless_cache_manager {
+    void set_constant_info(size_t bin_offset,
+                           size_t original_size,
+                           ov::element::Type original_dtype,
+                           ov::element::Type curr_dtype,
+                           ov::Shape shape) {
+        this->bin_offset = bin_offset;
+        this->original_size = original_size;
+        this->original_dtype = original_dtype;
+        this->curr_dtype = curr_dtype;
+        this->shape = shape;
+        do_weightless_caching = true;
+
+        if (original_dtype != curr_dtype) {
+            do_precision_conversion = true;
+        }
+    }
+
+    void invalidate() {
+        do_weightless_caching = false;
+    }
+
+    void set_new_dtype(ov::element::Type curr_dtype) {
+        this->curr_dtype = curr_dtype;
+        do_precision_conversion = original_dtype != curr_dtype;
+    }
+
+    bool save(BinaryOutputBuffer& ob, size_t data_size) const {
+        if (!do_weightless_caching) {
+            ob << false;
+            return false;
+        }
+
+        ob << true;
+        ob << bin_offset;
+        ob << do_precision_conversion;
+        if (do_precision_conversion) {
+            ob << original_size;
+            ob << make_data(&original_dtype, sizeof(ov::element::Type));
+            ob << make_data(&curr_dtype, sizeof(ov::element::Type));
+
+            size_t num_dims = shape.size();
+            ob << make_data(&num_dims, sizeof(size_t));
+            ob << make_data(shape.data(), num_dims * sizeof(ov::Shape::value_type));
+        }
+        return true;
+    }
+
+    std::shared_ptr<weights_mem> load(BinaryInputBuffer& ib,
+                                      std::shared_ptr<ov::MappedMemory> mapped_weights,
+                                      size_t data_size) {
+        ib >> do_weightless_caching;
+        if (!do_weightless_caching) {
+            return nullptr;
+        }
+
+        OPENVINO_ASSERT(mapped_weights != nullptr, "mmap object is null");
+
+        ib >> bin_offset;
+        ib >> do_precision_conversion;
+        if (do_precision_conversion) {
+            ib >> original_size;
+            ib >> make_data(&original_dtype, sizeof(ov::element::Type));
+            ib >> make_data(&curr_dtype, sizeof(ov::element::Type));
+
+            size_t num_dims = 0;
+            ib >> make_data(&num_dims, sizeof(size_t));
+            shape.resize(num_dims);
+            ib >> make_data(shape.data(), num_dims * sizeof(ov::Shape::value_type));
+        } else {
+            original_size = data_size;
+        }
+
+        auto mem_obj = std::make_shared<weights_mem>();
+        mem_obj->shared_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::MappedMemory>>>(
+            mapped_weights->data() + bin_offset,
+            original_size,
+            mapped_weights);
+
+        if (should_run_transformations()) {
+            run_transformations(mem_obj);
+        }
+        return mem_obj;
+    }
+
+private:
+    bool do_weightless_caching = false;
+    bool do_precision_conversion = false;
+
+    size_t bin_offset = SIZE_MAX;
+    size_t original_size = SIZE_MAX;
+    ov::element::Type original_dtype = ov::element::Type_t::undefined;
+    ov::element::Type curr_dtype = ov::element::Type_t::undefined;
+    ov::Shape shape;
+
+    bool should_run_transformations() {
+        return do_precision_conversion;
+    }
+
+    void run_transformations(std::shared_ptr<weights_mem> mem_obj) {
+        auto orig_constant = std::make_shared<ov::op::v0::Constant>(original_dtype,
+                                                                    shape,
+                                                                    mem_obj->shared_buf->get_ptr(),
+                                                                    mem_obj->shared_buf);
+
+        ov::ParameterVector inputParams;
+        ov::ResultVector results;
+        results.push_back(std::make_shared<ov::op::v0::Result>(orig_constant->output(0)));
+        auto model = std::make_shared<ov::Model>(results, inputParams, "aux");
+
+        ov::pass::Manager manager("Plugin:GPU:weightless_cache_transformations");
+
+        if (do_precision_conversion) {
+            precisions_map fp_convert_precision_map = {
+                {original_dtype, curr_dtype}};
+            type_to_fuse_map empty_fuse_map = {};
+            const bool keep_precision_sensitive_in_fp32 = false;
+            const bool convert_input_output_precision = false;
+            const bool store_original_precision_as_rt_attribute = true;
+            manager.register_pass<ov::pass::ConvertPrecision>(fp_convert_precision_map,
+                                                              empty_fuse_map,
+                                                              keep_precision_sensitive_in_fp32,
+                                                              convert_input_output_precision,
+                                                              store_original_precision_as_rt_attribute);
+        }
+
+        manager.run_passes(model);
+        const auto& ops = model->get_ops();
+        auto it = std::find_if(ops.begin(), ops.end(), [](const std::shared_ptr<ov::Node>& node) {
+            return ov::op::util::is_constant(node);
+        });
+        OPENVINO_ASSERT(it != ops.end());
+        mem_obj->transformed_constant = std::dynamic_pointer_cast<ov::op::v0::Constant>(*it);
+        OPENVINO_ASSERT(mem_obj->transformed_constant->get_element_type() == curr_dtype);
+    }
+};
+
 /// @brief Provides input data to topology.
 /// @details This primitive allows to pass data which is known at topology creation.
 /// For example, weights and biases for scoring networks.
@@ -20,21 +175,32 @@ namespace cldnn {
 struct data : public primitive_base<data> {
     CLDNN_DECLARE_PRIMITIVE(data)
 
-    data() : primitive_base("", {}) {}
+    data() : primitive_base("", {}) {
+        cache_info = std::make_shared<weightless_cache_manager>();
+    }
 
     /// @brief Constructs data primitive.
     /// @param id This primitive id.
     /// @param mem @ref memory object which contains data.
     /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build.
-    data(const primitive_id& id, memory::ptr mem)
-        : primitive_base(id, {}), mem(std::move(mem)) {}
+    data(const primitive_id& id, memory::ptr mem) : primitive_base(id, {}), mem(std::move(mem)) {
+        cache_info = std::make_shared<weightless_cache_manager>();
+    }
+
+    data(const primitive_id& id, memory::ptr mem, std::shared_ptr<weightless_cache_manager> cache_info)
+        : primitive_base(id, {}),
+          mem(std::move(mem)),
+          cache_info(cache_info) {
+        if (!cache_info) {
+            this->cache_info = std::make_shared<weightless_cache_manager>();
+        }
+    }
 
     /// @brief @ref memory object which contains data.
     /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build.
     memory::ptr mem;
 
-    size_t original_size = SIZE_MAX;
-    size_t bin_offset = SIZE_MAX;
+    std::shared_ptr<weightless_cache_manager> cache_info;
 
     size_t hash() const override {
         size_t seed = primitive::hash();
@@ -53,13 +219,8 @@ struct data : public primitive_base<data> {
         size_t data_size = mem->size();
         ob << make_data(&data_size, sizeof(size_t));
 
-        bool is_cache_without_weights = bin_offset != SIZE_MAX && data_size == original_size;
-
-        if (is_cache_without_weights) {
-            ob << true;
-            ob << bin_offset;
-        } else {
-            ob << false;
+        bool do_weightless_caching = cache_info->save(ob, data_size);
+        if (!do_weightless_caching) {
             if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
                 ob << make_data(mem->buffer_ptr(), data_size);
             } else {
@@ -88,26 +249,12 @@ struct data : public primitive_base<data> {
 
         mem = ib.get_engine().allocate_memory(output_layout, _allocation_type, false);
 
-        bool is_cache_without_weights;
-        ib >> is_cache_without_weights;
-        if (is_cache_without_weights && mapped_weights == nullptr) {
-            OPENVINO_THROW("mmap object is null");
-        }
-
-        std::shared_ptr<ov::SharedBuffer<std::shared_ptr<ov::MappedMemory>>> shared_buf;
-        if (is_cache_without_weights) {
-            ib >> bin_offset;
-            original_size = data_size;
-
-            shared_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::MappedMemory>>>(
-                mapped_weights->data() + bin_offset,
-                data_size,
-                mapped_weights);
-        }
+        auto mem_obj = cache_info->load(ib, mapped_weights, data_size);
+        bool is_weightless_caching_enabled = mem_obj != nullptr;
 
         if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
-            if (is_cache_without_weights) {
-                std::memcpy(reinterpret_cast<uint8_t*>(mem->buffer_ptr()), shared_buf->get_ptr<uint8_t>(), data_size);
+            if (is_weightless_caching_enabled) {
+                std::memcpy(reinterpret_cast<uint8_t*>(mem->buffer_ptr()), mem_obj->get_loaded_data(), data_size);
             } else {
                 ib >> make_data(mem->buffer_ptr(), data_size);
             }
@@ -116,8 +263,8 @@ struct data : public primitive_base<data> {
             auto& strm = ib.get_engine().get_service_stream();
             if (data_size < DATA_BLOCK_SIZE || output_layout.format.is_image_2d()) {
                 std::vector<uint8_t> _buf(data_size);
-                if (is_cache_without_weights) {
-                    std::memcpy(reinterpret_cast<uint8_t*>(_buf.data()), shared_buf->get_ptr<uint8_t>(), data_size);
+                if (is_weightless_caching_enabled) {
+                    std::memcpy(reinterpret_cast<uint8_t*>(_buf.data()), mem_obj->get_loaded_data(), data_size);
                 } else {
                     ib >> make_data(_buf.data(), data_size);
                 }
@@ -135,9 +282,9 @@ struct data : public primitive_base<data> {
                     size_t copy_size =
                         (data_size > (dst_offset + DATA_BLOCK_SIZE)) ? DATA_BLOCK_SIZE : (data_size - dst_offset);
                     if (buf_flag) {
-                        if (is_cache_without_weights) {
+                        if (is_weightless_caching_enabled) {
                             std::memcpy(reinterpret_cast<uint8_t*>(_buf1.data()),
-                                        shared_buf->get_ptr<uint8_t>() + dst_offset,
+                                        mem_obj->get_loaded_data() + dst_offset,
                                         copy_size);
                         } else {
                             ib >> make_data(_buf1.data(), copy_size);
@@ -148,9 +295,9 @@ struct data : public primitive_base<data> {
                         }
                         ev1 = mem->copy_from(strm, _buf1.data(), src_offset, dst_offset, copy_size, is_blocking);
                     } else {
-                        if (is_cache_without_weights) {
+                        if (is_weightless_caching_enabled) {
                             std::memcpy(reinterpret_cast<uint8_t*>(_buf2.data()),
-                                        shared_buf->get_ptr<uint8_t>() + dst_offset,
+                                        mem_obj->get_loaded_data() + dst_offset,
                                         copy_size);
                         } else {
                             ib >> make_data(_buf2.data(), copy_size);
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
index 85173e9eb33e7c..a4129800733875 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
@@ -74,11 +74,14 @@ void propagate_constants::run(program& p) {
     // replace all constant nodes which are relevant for inference (either used by non-const user or marked as output)
     // with recomputed cldnn::data
     for (auto& cout : to_replace) {
-        auto& id_to_replace = cout.first;
-        auto mem_impl = cout.second;
-
-        auto const_data =
-            std::make_shared<data>("_cldnn_const_prop_" + id_to_replace, mem_impl /* <<< REMOVE ME WHEN POSSIBLE */);
+        auto& id_to_replace = std::get<0>(cout);
+        auto mem_impl = std::get<1>(cout);
+        auto cache_info = std::get<2>(cout);
+        auto in_layout = std::get<3>(cout);
+
+        auto const_data = std::make_shared<data>("_cldnn_const_prop_" + id_to_replace,
+                                                 mem_impl, /* <<< REMOVE ME WHEN POSSIBLE */
+                                                 cache_info);
         auto& new_node = p.get_or_create(const_data);
         auto& curr_node = p.get_node(id_to_replace);
 
@@ -92,6 +95,25 @@ void propagate_constants::run(program& p) {
             }
         }
 
+        auto is_reorder_with_only_dtype_change = [&](program_node& dst) {
+            if (!in_layout) {
+                return false;
+            }
+            auto& dst_layout = dst.get_output_layout();
+            if (in_layout->data_type == dst_layout.data_type) {
+                return false;
+            }
+
+            auto aux_layout = dst_layout;
+            aux_layout.data_type = in_layout->data_type;
+            return aux_layout == *in_layout.get();
+        };
+        if (is_reorder_with_only_dtype_change(new_node)) {
+            new_node.as<data>().get_primitive()->cache_info->set_new_dtype(new_node.get_output_layout().data_type);
+        } else {
+            new_node.as<data>().get_primitive()->cache_info->invalidate();
+        }
+
         curr_node.dependencies.clear();
         // remove all constant users (as they will be either removed or replaced by cldnn::data which does not have any
         // dependencies)
@@ -113,9 +135,10 @@ bool propagate_constants::has_non_const_user(program_node& node) const {
     return false;
 }
 
-std::list<std::pair<primitive_id, memory::ptr>> propagate_constants::calculate(engine& engine,
-                                                                               const ExecutionConfig& config,
-                                                                               std::shared_ptr<ov::threading::IStreamsExecutor> task_executor) {
+std::list<std::tuple<primitive_id, memory::ptr, std::shared_ptr<weightless_cache_manager>, std::shared_ptr<layout>>>
+propagate_constants::calculate(engine& engine,
+                               const ExecutionConfig& config,
+                               std::shared_ptr<ov::threading::IStreamsExecutor> task_executor) {
     if (!has_non_trivial_constants)
         return {};
 
@@ -123,15 +146,37 @@ std::list<std::pair<primitive_id, memory::ptr>> propagate_constants::calculate(e
     cf_config.set_property(ov::intel_gpu::optimize_data(false));
     cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs));
     network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true);
-    for (auto& cin : const_inputs)
+    std::map<primitive_id, std::pair<std::shared_ptr<weightless_cache_manager>, std::shared_ptr<layout>>>
+        weightless_cache_map;
+    for (auto& cin : const_inputs) {
         net->set_input_data(cin->id(), cin->get_attached_memory_ptr());
 
+        auto users = cin->get_users();
+        if (users.size() == 1 && users.front()->is_type<reorder>()) {
+            auto rprim = users.front()->as<reorder>().get_primitive();
+            auto id = rprim->id;
+            auto cache_ptr = cin->as<data>().get_primitive()->cache_info;
+            auto layout_ptr = std::make_shared<layout>(cin->get_output_layout());
+            weightless_cache_map.emplace(id, std::make_pair(cache_ptr, layout_ptr));
+        }
+    }
+
     net->execute({});
     net->reset_execution(true);  // wait for computations to complete
     auto outputs = net->get_outputs();
 
-    std::list<std::pair<primitive_id, memory::ptr>> ret;
-    for (auto& out : outputs) ret.push_back({out->id(), out->output_memory_ptr()});
+    std::list<std::tuple<primitive_id, memory::ptr, std::shared_ptr<weightless_cache_manager>, std::shared_ptr<layout>>>
+        ret;
+    for (auto& out : outputs) {
+        std::shared_ptr<weightless_cache_manager> cache_ptr = nullptr;
+        std::shared_ptr<layout> layout_ptr = nullptr;
+        auto it = weightless_cache_map.find(out->id());
+        if (it != weightless_cache_map.end()) {
+            cache_ptr = it->second.first;
+            layout_ptr = it->second.second;
+        }
+        ret.push_back({out->id(), out->output_memory_ptr(), cache_ptr, layout_ptr});
+    }
 
     return ret;
 }
diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h
index 490076a37f788e..0b7c3d85c37e27 100644
--- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h
+++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h
@@ -211,9 +211,10 @@ class propagate_constants : public base_pass {
 
 private:
     void run(program& p) override;
-    std::list<std::pair<primitive_id, memory::ptr>> calculate(engine& engine,
-                                                              const ExecutionConfig& config,
-                                                              std::shared_ptr<ov::threading::IStreamsExecutor> task_executor);
+    std::list<std::tuple<primitive_id, memory::ptr, std::shared_ptr<weightless_cache_manager>, std::shared_ptr<layout>>>
+    calculate(engine& engine,
+              const ExecutionConfig& config,
+              std::shared_ptr<ov::threading::IStreamsExecutor> task_executor);
     bool has_non_const_user(program_node& node) const;
     void handle_constant(program& prog, program_node& node);
     void add_constant(program& prog, program_node& node);
diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
index 368e25abe2ddac..a9bb813d0ce587 100644
--- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
@@ -14,6 +14,7 @@
 
 #include "intel_gpu/plugin/common_utils.hpp"
 #include "intel_gpu/plugin/program_builder.hpp"
+#include "intel_gpu/primitives/data.hpp"
 #include "intel_gpu/runtime/itt.hpp"
 #include "intel_gpu/runtime/debug_configuration.hpp"
 #include "intel_gpu/primitives/mutable_data.hpp"
@@ -311,11 +312,15 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr<cldnn::pr
     if (this->m_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) {
         if (auto data_prim = dynamic_cast<cldnn::data*>(prim.get())) {
             auto rt_info = op.get_rt_info();
+
             auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static());
             if (weightless_cache_attr != rt_info.end()) {
-                data_prim->bin_offset = weightless_cache_attr->second.as<ov::WeightlessCacheAttribute>().bin_offset;
-                data_prim->original_size =
-                    weightless_cache_attr->second.as<ov::WeightlessCacheAttribute>().original_size;
+                auto& attr = weightless_cache_attr->second.as<ov::WeightlessCacheAttribute>();
+                data_prim->cache_info->set_constant_info(attr.bin_offset,
+                                                         attr.original_size,
+                                                         attr.original_dtype,
+                                                         op.get_output_element_type(0),
+                                                         op.get_output_shape(0));
             }
         }
     }
diff --git a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
index 839b2640ca180c..17e1ed6d0a9bbe 100644
--- a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
+++ b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
@@ -8,48 +8,40 @@
 #include "common_test_utils/common_utils.hpp"
 #include "common_test_utils/file_utils.hpp"
 #include "common_test_utils/ov_tensor_utils.hpp"
-#include "common_test_utils/subgraph_builders/2_input_subtract.hpp"
-#include "common_test_utils/subgraph_builders/concat_with_params.hpp"
-#include "common_test_utils/subgraph_builders/conv_bias.hpp"
-#include "common_test_utils/subgraph_builders/conv_pool_relu.hpp"
-#include "common_test_utils/subgraph_builders/conv_pool_relu_no_reshapes.hpp"
-#include "common_test_utils/subgraph_builders/conv_pool_relu_non_zero.hpp"
-#include "common_test_utils/subgraph_builders/convert_transpose.hpp"
-#include "common_test_utils/subgraph_builders/detection_output.hpp"
-#include "common_test_utils/subgraph_builders/kso_func.hpp"
-#include "common_test_utils/subgraph_builders/matmul_bias.hpp"
-#include "common_test_utils/subgraph_builders/multi_single_conv.hpp"
-#include "common_test_utils/subgraph_builders/multiple_input_outpput_double_concat.hpp"
-#include "common_test_utils/subgraph_builders/nested_branch_conv_concat.hpp"
-#include "common_test_utils/subgraph_builders/nested_split_conv_concat.hpp"
 #include "common_test_utils/subgraph_builders/read_concat_split_assign.hpp"
 #include "common_test_utils/subgraph_builders/single_concat_with_constant.hpp"
-#include "common_test_utils/subgraph_builders/single_conv.hpp"
-#include "common_test_utils/subgraph_builders/single_split.hpp"
-#include "common_test_utils/subgraph_builders/split_concat.hpp"
-#include "common_test_utils/subgraph_builders/split_conv_concat.hpp"
-#include "common_test_utils/subgraph_builders/split_multi_conv_concat.hpp"
 #include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp"
 #include "common_test_utils/test_common.hpp"
 #include "openvino/pass/serialize.hpp"
 
 namespace {
-class CheckWeightlessCacheAccuracy : public ::testing::Test,
-                                     public ::testing::WithParamInterface<bool> {
+typedef std::tuple<bool, ov::element::Type, ov::element::Type> testParams;
+
+class CheckWeightlessCacheAccuracy : public ::testing::Test, public ::testing::WithParamInterface<testParams> {
 public:
-    static std::string get_test_case_name(::testing::TestParamInfo<bool> obj) {
-        bool use_compile_model_api = obj.param;
+    static std::string get_test_case_name(::testing::TestParamInfo<testParams> obj) {
+        bool use_compile_model_api_;
+        ov::element::Type inference_mode_;
+        ov::element::Type model_dtype_;
+        std::tie(use_compile_model_api_, inference_mode_, model_dtype_) = obj.param;
 
         std::ostringstream result;
-        result << "use_compile_model_api=" << use_compile_model_api;
+        const char separator = '_';
+        result << "use_compile_model_api=" << use_compile_model_api_ << separator;
+        result << "inference_mode=" << inference_mode_ << separator;
+        result << "model_dtype=" << model_dtype_;
         return result.str();
     }
+
 protected:
     std::shared_ptr<ov::Model> model;
     std::string xml_path;
     std::string bin_path;
     std::string cache_path;
-    bool use_compile_model_api; // for loading from cache
+    std::string cache_dir;
+    bool use_compile_model_api;  // for loading from cache
+    ov::element::Type inference_mode;
+    ov::element::Type model_dtype;
 
     void SetUp() override;
     void TearDown() override;
@@ -61,36 +53,46 @@ void CheckWeightlessCacheAccuracy::SetUp() {
     xml_path = filePrefix + ".xml";
     bin_path = filePrefix + ".bin";
     cache_path = filePrefix + ".blob";
-    use_compile_model_api = GetParam();
+    cache_dir = filePrefix + "_cache_dir";
+
+    std::tie(use_compile_model_api, inference_mode, model_dtype) = GetParam();
 }
 
 void CheckWeightlessCacheAccuracy::TearDown() {
     std::remove(xml_path.c_str());
     std::remove(bin_path.c_str());
     std::remove(cache_path.c_str());
+
+    ov::test::utils::removeFilesWithExt(cache_dir, "blob");
+    ov::test::utils::removeFilesWithExt(cache_dir, "cl_cache");
+    ov::test::utils::removeDir(cache_dir);
 }
 
 void CheckWeightlessCacheAccuracy::run() {
-    ov::AnyMap config = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE) };
-    ov::AnyMap config_with_weights_path = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), ov::weights_path(bin_path) };
+    ov::AnyMap config = {ov::cache_dir(cache_dir),
+                         ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE),
+                         ov::hint::inference_precision(inference_mode)};
+    ov::AnyMap config_with_weights_path = {ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE),
+                                           ov::weights_path(bin_path),
+                                           ov::hint::inference_precision(inference_mode)};
     auto core = ov::test::utils::PluginCache::get().core();
     ov::pass::Serialize(xml_path, bin_path).run_on_model(model);
 
     ov::CompiledModel compiled_model;
-    OV_ASSERT_NO_THROW(compiled_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config));
+    compiled_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config);
 
-    auto ofstr = std::ofstream(cache_path, std::ofstream::binary);
-    OV_ASSERT_NO_THROW(compiled_model.export_model(ofstr));
-    ofstr.close();
+    if (!use_compile_model_api) {
+        auto ofstr = std::ofstream(cache_path, std::ofstream::binary);
+        compiled_model.export_model(ofstr);
+        ofstr.close();
+    }
 
     auto ifstr = std::ifstream(cache_path, std::ifstream::binary);
     ov::CompiledModel imported_model;
     if (use_compile_model_api) {
-        OV_ASSERT_NO_THROW(imported_model =
-                               core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config));
+        imported_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config);
     } else {
-        OV_ASSERT_NO_THROW(imported_model =
-                               core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path));
+        imported_model = core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path);
     }
     ifstr.close();
 
@@ -99,39 +101,57 @@ void CheckWeightlessCacheAccuracy::run() {
 
     for (size_t param_idx = 0; param_idx < model->get_parameters().size(); ++param_idx) {
         auto input = model->get_parameters().at(param_idx);
-        auto tensor = ov::test::utils::create_and_fill_tensor(input->get_element_type(), input->get_shape());
+        auto tensor = ov::test::utils::create_and_fill_tensor_real_distribution(input->get_element_type(),
+                                                                                input->get_shape(),
+                                                                                -100,
+                                                                                100,
+                                                                                param_idx);
         orig_req.set_tensor(input, tensor);
         new_req.set_tensor(input, tensor);
     }
 
-    OV_ASSERT_NO_THROW(orig_req.infer());
-    OV_ASSERT_NO_THROW(new_req.infer());
+    orig_req.infer();
+    new_req.infer();
 
     auto result_vector = model->get_results();
     for (auto& res : result_vector) {
         auto orig_out = orig_req.get_tensor(res);
         auto new_out = new_req.get_tensor(res);
-        ov::test::utils::compare(orig_out, new_out);
+        ov::test::utils::compare(orig_out, new_out, inference_mode);
     }
 }
 
 TEST_P(CheckWeightlessCacheAccuracy, ReadConcatSplitAssign) {
-    model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, ov::element::f16);
-    run();
+    OV_ASSERT_NO_THROW(model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, model_dtype));
+    OV_ASSERT_NO_THROW(run());
 }
 
 TEST_P(CheckWeightlessCacheAccuracy, SingleConcatWithConstant) {
-    model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, ov::element::f16);
-    run();
+    OV_ASSERT_NO_THROW(model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, model_dtype));
+    OV_ASSERT_NO_THROW(run());
 }
 
 TEST_P(CheckWeightlessCacheAccuracy, TiWithLstmCell) {
-    model = ov::test::utils::make_ti_with_lstm_cell(ov::element::f16);
-    run();
+    OV_ASSERT_NO_THROW(model = ov::test::utils::make_ti_with_lstm_cell(model_dtype));
+    OV_ASSERT_NO_THROW(run());
 }
 
-INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, CheckWeightlessCacheAccuracy,
-                         ::testing::Bool(),
+const std::vector<ov::element::Type> inference_modes = {
+    ov::element::f32,
+    ov::element::f16,
+};
+
+const std::vector<ov::element::Type> model_dtypes = {
+    ov::element::f32,
+    ov::element::f16,
+    ov::element::bf16,
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy,
+                         CheckWeightlessCacheAccuracy,
+                         ::testing::Combine(::testing::Bool(),
+                                            ::testing::ValuesIn(inference_modes),
+                                            ::testing::ValuesIn(model_dtypes)),
                          CheckWeightlessCacheAccuracy::get_test_case_name);
 
 }  // namespace
diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp
index 7abdbcb8c2fc52..7b4f27b5af05b4 100644
--- a/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp
@@ -23,11 +23,11 @@ using namespace ov;
 namespace shape_infer_tests {
 
 struct eltwise_test_params {
-    layout input1_layout;
-    layout input2_layout;
+    cldnn::layout input1_layout;
+    cldnn::layout input2_layout;
     eltwise_mode mode;
     AutoBroadcastSpec auto_broadcast_spec;
-    layout expected_layout;
+    cldnn::layout expected_layout;
     std::vector<tensor> stride;
 };