diff --git a/common/wrappers/copyable-atomic.h b/common/wrappers/copyable-atomic.h
index 2d767be543..e9f213d516 100644
--- a/common/wrappers/copyable-atomic.h
+++ b/common/wrappers/copyable-atomic.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <atomic>
+#include <type_traits>
 
 namespace vk {
 
@@ -32,4 +33,29 @@ class copyable_atomic : std::atomic<T> {
   // Add other operators if it is required
 };
 
+template<class T, class U = std::enable_if_t<std::is_integral_v<T>>>
+struct copyable_atomic_integral : std::atomic<T> {
+  using std::atomic<T>::atomic;
+  using std::atomic<T>::operator=;
+  using std::atomic<T>::store;
+  using std::atomic<T>::load;
+  using std::atomic<T>::exchange;
+  using std::atomic<T>::operator T;
+  using std::atomic<T>::compare_exchange_strong;
+  using std::atomic<T>::compare_exchange_weak;
+
+  // integral operations
+  using std::atomic<T>::fetch_add;
+  using std::atomic<T>::fetch_sub;
+
+  copyable_atomic_integral(const copyable_atomic_integral &other) :
+    std::atomic<T>(other.load()) {
+  }
+
+  copyable_atomic_integral& operator=(copyable_atomic_integral other) {
+    *this = other.load();
+    return *this;
+  }
+};
+
 } // namespace vk
diff --git a/compiler/compiler-core.cpp b/compiler/compiler-core.cpp
index 3e3bf03b6d..2e54c2edd1 100644
--- a/compiler/compiler-core.cpp
+++ b/compiler/compiler-core.cpp
@@ -490,11 +490,11 @@ VarPtr CompilerCore::create_var(const std::string &name, VarData::Type type) {
 VarPtr CompilerCore::get_global_var(const std::string &name, VertexPtr init_val) {
   auto *node = globals_ht.at(vk::std_hash(name));
 
-  if (!node->data) {
+  {
     AutoLocker<Lockable *> locker(node);
     if (!node->data) {
       node->data = create_var(name, VarData::var_global_t);
-      node->data->init_val = init_val;
+      node->data->init_val = init_val.clone();
       node->data->is_builtin_runtime = VarData::does_name_eq_any_builtin_runtime(name);
     }
   }
@@ -505,11 +505,11 @@ VarPtr CompilerCore::get_global_var(const std::string &name, VertexPtr init_val)
 VarPtr CompilerCore::get_constant_var(const std::string &name, VertexPtr init_val, bool *is_new_inserted) {
   auto *node = constants_ht.at(vk::std_hash(name));
   VarPtr new_var;
-  if (!node->data) {
+  { 
     AutoLocker<Lockable *> locker(node);
     if (!node->data) {
       new_var = create_var(name, VarData::var_const_t);
-      new_var->init_val = init_val;
+      new_var->init_val = init_val.clone();
       node->data = new_var;
     }
   }
diff --git a/compiler/compiler-core.h b/compiler/compiler-core.h
index c98d4c64ef..7d17abe3ef 100644
--- a/compiler/compiler-core.h
+++ b/compiler/compiler-core.h
@@ -8,6 +8,7 @@
 /*** Core ***/
 //Consists mostly of functions that require synchronization
 
+#include <atomic>
 #include <string>
 #include <vector>
 
@@ -61,7 +62,7 @@ class CompilerCore {
   std::vector<std::string> kphp_runtime_opts;
   std::vector<std::string> exclude_namespaces;
   bool is_untyped_rpc_tl_used{false};
-  bool is_functions_txt_parsed{false};
+  std::atomic_bool is_functions_txt_parsed{false};
   function_palette::Palette function_palette;
 
   inline bool try_require_file(SrcFilePtr file);
@@ -177,11 +178,11 @@ class CompilerCore {
   }
 
   void set_functions_txt_parsed() {
-    is_functions_txt_parsed = true;
+    is_functions_txt_parsed.store(true, std::memory_order_seq_cst);
   }
 
   bool get_functions_txt_parsed() const {
-    return is_functions_txt_parsed;
+    return is_functions_txt_parsed.load(std::memory_order_seq_cst);
   }
 
   bool is_output_mode_server() const {
diff --git a/compiler/data/class-members.cpp b/compiler/data/class-members.cpp
index 434b62df58..aca1de0e85 100644
--- a/compiler/data/class-members.cpp
+++ b/compiler/data/class-members.cpp
@@ -60,7 +60,7 @@ inline ClassMemberStaticField::ClassMemberStaticField(ClassPtr klass, VertexAdap
   std::string global_var_name = replace_backslashes(klass->name) + "$$" + root->get_string();
   var = G->get_global_var(global_var_name, def_val);
   root->var_id = var;
-  var->init_val = def_val;
+  var->init_val = def_val.clone();
   var->class_id = klass;
 }
 
@@ -101,7 +101,7 @@ ClassMemberInstanceField::ClassMemberInstanceField(ClassPtr klass, VertexAdaptor
   std::string local_var_name = root->get_string();
   var = G->create_var(local_var_name, VarData::var_instance_t);
   root->var_id = var;
-  var->init_val = def_val;
+  var->init_val = def_val.clone();
   var->class_id = klass;
   var->marked_as_const = klass->is_immutable || (phpdoc && phpdoc->has_tag(PhpDocType::kphp_const));
 }
diff --git a/compiler/data/var-data.h b/compiler/data/var-data.h
index 17599dddf8..b9ea0d53fc 100644
--- a/compiler/data/var-data.h
+++ b/compiler/data/var-data.h
@@ -7,6 +7,7 @@
 #include <cstdint>
 #include <string>
 
+#include "common/wrappers/copyable-atomic.h"
 #include "compiler/data/class-members.h"
 #include "compiler/debug.h"
 #include "compiler/inferring/var-node.h"
@@ -45,14 +46,18 @@ class VarData {
   bool marked_as_const = false;
   bool is_read_only = true;
   bool is_foreach_reference = false;
-  bool is_builtin_runtime = false;        // $_SERVER, $argv, etc., see PhpScriptBuiltInSuperGlobals in runtime
-  int dependency_level = 0;               // for constants only (c_str$, c_arr$, etc)
-  int offset_in_linear_mem = -1;          // for globals only (offset in g_linear_mem)
-  int batch_idx = -1;                     // for constants and globals, a number [0;N), see const-globals-batched-mem.h
+  bool is_builtin_runtime = false;               // $_SERVER, $argv, etc., see PhpScriptBuiltInSuperGlobals in runtime
+  vk::copyable_atomic<int> dependency_level = 0; // for constants only (c_str$, c_arr$, etc)
+  int offset_in_linear_mem = -1;                 // for globals only (offset in g_linear_mem)
+  int batch_idx = -1;                            // for constants and globals, a number [0;N), see const-globals-batched-mem.h
 
   void set_uninited_flag(bool f);
   bool get_uninited_flag();
 
+  VarData(const VarData &) = default;
+  VarData(VarData &&) = default;
+  VarData &operator=(const VarData &) = default;
+  VarData &operator=(VarData &&) = default;
   explicit VarData(Type type);
 
   inline Type &type() { return type_; }
diff --git a/compiler/gentree.cpp b/compiler/gentree.cpp
index 43fc1f6c47..5cb32f4732 100644
--- a/compiler/gentree.cpp
+++ b/compiler/gentree.cpp
@@ -1607,6 +1607,8 @@ VertexAdaptor<op_function> GenTree::get_function(bool is_lambda, const PhpDocCom
     cur_function->root->cmd_ref() = VertexAdaptor<op_seq>::create();
   }
 
+  auto resp = cur_function->root;
+
   // the function is ready, register it;
   // the constructor is registered later, after the entire class is parsed
   if (!cur_function->is_constructor()) {
@@ -1616,10 +1618,10 @@ VertexAdaptor<op_function> GenTree::get_function(bool is_lambda, const PhpDocCom
                         || cur_function->modifiers.is_instance()
                         || cur_function->is_lambda()
                         || kphp_required_flag;
-    G->register_and_require_function(cur_function, parsed_os, auto_require);
+    G->register_and_require_function(cur_function, parsed_os, auto_require); // pass function further
   }
 
-  return cur_function->root;
+  return resp;
 }
 
 bool GenTree::check_seq_end() {
diff --git a/compiler/inferring/node.cpp b/compiler/inferring/node.cpp
index 42ef98839a..6ba553d64a 100644
--- a/compiler/inferring/node.cpp
+++ b/compiler/inferring/node.cpp
@@ -1,16 +1,16 @@
 // Compiler for PHP (aka KPHP)
-// Copyright (c) 2020 LLC «V Kontakte»
+// Copyright (c) 2024 LLC «V Kontakte»
 // Distributed under the GPL v3 License, see LICENSE.notice.txt
 
 #include "compiler/inferring/node.h"
 
 #include "compiler/inferring/type-data.h"
-#include "compiler/stage.h"
+#include <atomic>
 
 namespace tinf {
 
 std::string Node::as_human_readable() const {
-  return type_->as_human_readable(false);
+  return type_.load(std::memory_order_relaxed)->as_human_readable(false);
 }
 
 void Node::register_edge_from_this(const tinf::Edge *edge) {
@@ -28,16 +28,20 @@ bool Node::try_start_recalc() {
     int recalc_state_copy = recalc_state_;
     int once_finished_flag = recalc_state_copy & recalc_bit_at_least_once;
     switch (recalc_state_copy & 15) {   // preserve bit 16 in transformations
-      case recalc_st_waiting:
-        if (__sync_bool_compare_and_swap(&recalc_state_, recalc_st_waiting | once_finished_flag, recalc_st_need_relaunch | once_finished_flag)) {
+      case recalc_st_waiting: {
+        int old = recalc_st_waiting | once_finished_flag;
+        if (recalc_state_.compare_exchange_strong(old, recalc_st_need_relaunch | once_finished_flag)) {
           return true;
         }
         break;
-      case recalc_st_processing:
-        if (__sync_bool_compare_and_swap(&recalc_state_, recalc_st_processing | once_finished_flag, recalc_st_need_relaunch | once_finished_flag)) {
+      }
+      case recalc_st_processing: {
+        int old = recalc_st_processing | once_finished_flag;
+        if (recalc_state_.compare_exchange_strong(old, recalc_st_need_relaunch | once_finished_flag)) {
           return false;
         }
         break;
+      }
       case recalc_st_need_relaunch:
         return false;
       default:
@@ -49,7 +53,8 @@ bool Node::try_start_recalc() {
 
 void Node::start_recalc() {
   int once_finished_flag = recalc_state_ & recalc_bit_at_least_once;  // preserve bit 16 in transformation
-  bool swapped = __sync_bool_compare_and_swap(&recalc_state_, recalc_st_need_relaunch | once_finished_flag, recalc_st_processing | once_finished_flag);
+  int old = recalc_st_need_relaunch | once_finished_flag;
+  bool swapped = recalc_state_.compare_exchange_strong(old, recalc_st_processing | once_finished_flag);
   kphp_assert(swapped);
 }
 
@@ -57,21 +62,27 @@ bool Node::try_finish_recalc() {
   while (true) {
     int recalc_state_copy = recalc_state_;
     switch (recalc_state_copy) {  // always set bit 16 in transformations
-      case recalc_st_processing:
-        if (__sync_bool_compare_and_swap(&recalc_state_, recalc_st_processing, recalc_st_waiting | recalc_bit_at_least_once)) {
+      case recalc_st_processing: {
+        int old = recalc_st_processing;
+        if (recalc_state_.compare_exchange_strong(old, recalc_st_waiting | recalc_bit_at_least_once)) {
           return true;
         }
         break;
-      case recalc_st_processing | recalc_bit_at_least_once:
-        if (__sync_bool_compare_and_swap(&recalc_state_, recalc_st_processing | recalc_bit_at_least_once, recalc_st_waiting | recalc_bit_at_least_once)) {
+      }
+      case recalc_st_processing | recalc_bit_at_least_once: {
+        int old = recalc_st_processing | recalc_bit_at_least_once;
+        if (recalc_state_.compare_exchange_strong(old, recalc_st_waiting | recalc_bit_at_least_once)) {
           return true;
         }
         break;
-      case recalc_st_need_relaunch:
-        if (__sync_bool_compare_and_swap(&recalc_state_, recalc_st_need_relaunch, recalc_st_need_relaunch | recalc_bit_at_least_once)) {
+      }
+      case recalc_st_need_relaunch: {
+        int old = recalc_st_need_relaunch;
+        if (recalc_state_.compare_exchange_strong(old, recalc_st_need_relaunch | recalc_bit_at_least_once)) {
           return false; // false here, unlike above, but like below
         }
         break;
+      }
       case recalc_st_need_relaunch | recalc_bit_at_least_once:
         return false;
       default:
diff --git a/compiler/inferring/node.h b/compiler/inferring/node.h
index 84e1e9bd80..27495e067b 100644
--- a/compiler/inferring/node.h
+++ b/compiler/inferring/node.h
@@ -4,12 +4,14 @@
 
 #pragma once
 
+#include <atomic>
 #include <string>
 #include <forward_list>
 
+#include "common/wrappers/copyable-atomic.h"
 #include "compiler/debug.h"
-#include "compiler/location.h"
 #include "compiler/inferring/type-data.h"
+#include "compiler/location.h"
 #include "compiler/threading/locks.h"
 
 namespace tinf {
@@ -39,11 +41,11 @@ class Node : public Lockable {
     recalc_bit_at_least_once = 16,
   };
 
-  const TypeData *type_{TypeData::get_type(tp_any)};
+  vk::copyable_atomic<const TypeData *> type_{TypeData::get_type(tp_any)};
 
   // this field is a finite-state automation for multithreading synchronization, see enum above
   // if should be placed here (after TypeData*) to make it join with the next int field in memory
-  int recalc_state_{recalc_st_waiting};
+  vk::copyable_atomic<int> recalc_state_{recalc_st_waiting};
 
 public:
 
@@ -54,11 +56,11 @@ class Node : public Lockable {
   std::string as_human_readable() const;
 
   bool was_recalc_started_at_least_once() const {
-    return recalc_state_ > recalc_st_waiting;
+    return recalc_state_.load() > recalc_st_waiting;
   }
 
   bool was_recalc_finished_at_least_once() const {
-    return recalc_state_ >= recalc_bit_at_least_once;
+    return recalc_state_.load() >= recalc_bit_at_least_once;
   }
 
   void register_edge_from_this(const tinf::Edge *edge);
@@ -77,11 +79,11 @@ class Node : public Lockable {
   bool try_finish_recalc();
 
   const TypeData *get_type() const {
-    return type_;
+    return type_.load(std::memory_order_relaxed);
   }
 
   void set_type(const TypeData *type) {
-    type_ = type;
+    type_.store(type, std::memory_order_relaxed);
   }
 
   virtual void recalc(TypeInferer *inferer) = 0;
diff --git a/compiler/inferring/type-node.cpp b/compiler/inferring/type-node.cpp
index 038f8d1a34..cf196e06af 100644
--- a/compiler/inferring/type-node.cpp
+++ b/compiler/inferring/type-node.cpp
@@ -1,14 +1,15 @@
 // Compiler for PHP (aka KPHP)
-// Copyright (c) 2020 LLC «V Kontakte»
+// Copyright (c) 2024 LLC «V Kontakte»
 // Distributed under the GPL v3 License, see LICENSE.notice.txt
 
-#include "compiler/inferring/type-node.h"
+#include <atomic>
 
 #include "compiler/inferring/type-data.h"
+#include "compiler/inferring/type-node.h"
 #include "compiler/stage.h"
 
 std::string tinf::TypeNode::get_description() {
-  return "TypeNode at " + location_.as_human_readable() + " : " + type_->as_human_readable();
+  return "TypeNode at " + location_.as_human_readable() + " : " + type_.load(std::memory_order_relaxed)->as_human_readable();
 }
 
 const Location &tinf::TypeNode::get_location() const {
diff --git a/compiler/pipes/collect-const-vars.cpp b/compiler/pipes/collect-const-vars.cpp
index e0d774d625..f50880264d 100644
--- a/compiler/pipes/collect-const-vars.cpp
+++ b/compiler/pipes/collect-const-vars.cpp
@@ -4,6 +4,8 @@
 
 #include "compiler/pipes/collect-const-vars.h"
 
+#include <atomic>
+
 #include "compiler/data/src-file.h"
 #include "compiler/vertex-util.h"
 #include "compiler/data/var-data.h"
@@ -218,10 +220,10 @@ int get_expr_dep_level(VertexPtr vertex) {
 }
 
 void set_var_dep_level(VarPtr var_id) {
-  if (!IsComposite::visit(var_id->init_val)) {
-    var_id->dependency_level = 0;
-  } else {
-    var_id->dependency_level = 1 + get_expr_dep_level(var_id->init_val);
+  if (IsComposite::visit(var_id->init_val)) {
+    int old = var_id->dependency_level.load(std::memory_order_relaxed);
+    int cur = 1 + get_expr_dep_level(var_id->init_val);
+    var_id->dependency_level.store(std::max(old, cur), std::memory_order_relaxed);
   }
 }
 
diff --git a/compiler/pipes/collect-required-and-classes.cpp b/compiler/pipes/collect-required-and-classes.cpp
index 761f585828..7b5f0d7c0e 100644
--- a/compiler/pipes/collect-required-and-classes.cpp
+++ b/compiler/pipes/collect-required-and-classes.cpp
@@ -31,7 +31,7 @@ class CollectRequiredPass final : public FunctionPassBase {
     // avoid a race condition, when we try to search for RpcFunction.php and other built-in classes that are visible from index.php
     // (if such files exist, extra src_xxx$called variables will be created: unstable codegeneration)
     while (!G->get_functions_txt_parsed()) {
-      usleep(100000);
+      usleep(100000); // TODO good place for condvar
     }
 
     if (G->get_class(class_name)) {
diff --git a/compiler/pipes/register-variables.cpp b/compiler/pipes/register-variables.cpp
index 30b8f80290..81eff03d9e 100644
--- a/compiler/pipes/register-variables.cpp
+++ b/compiler/pipes/register-variables.cpp
@@ -77,7 +77,7 @@ void RegisterVariablesPass::register_function_static_var(VertexAdaptor<op_var> v
 
   if (default_value) {
     if (!kphp_error(is_const(default_value), fmt_format("Default value of [{}] is not constant", name))) {
-      var->init_val = default_value;
+      var->init_val = default_value.clone();
     }
   }
   var_vertex->var_id = var;
@@ -91,7 +91,7 @@ void RegisterVariablesPass::register_param_var(VertexAdaptor<op_var> var_vertex,
   kphp_assert (var);
   if (default_value) {
     kphp_error_return(is_const(default_value) || current_function->is_extern(), fmt_format("Default value of [{}] is not constant", name));
-    var->init_val = default_value;
+    var->init_val = default_value.clone();
   }
   var_vertex->var_id = var;
 }
diff --git a/compiler/pipes/sort-and-inherit-classes.h b/compiler/pipes/sort-and-inherit-classes.h
index d23e51bac3..8266f9a64c 100644
--- a/compiler/pipes/sort-and-inherit-classes.h
+++ b/compiler/pipes/sort-and-inherit-classes.h
@@ -6,6 +6,7 @@
 
 #include <forward_list>
 
+#include "common/wrappers/copyable-atomic.h"
 #include "compiler/data/class-members.h"
 #include "compiler/data/data_ptr.h"
 #include "compiler/threading/data-stream.h"
@@ -14,7 +15,7 @@
 class SortAndInheritClassesF {
 private:
   struct wait_list {
-    bool done;
+    vk::copyable_atomic<bool> done;
     std::forward_list<ClassPtr> waiting;
   };
 
diff --git a/compiler/scheduler/scheduler-base.cpp b/compiler/scheduler/scheduler-base.cpp
index 1ab866cba5..987aab2ac1 100644
--- a/compiler/scheduler/scheduler-base.cpp
+++ b/compiler/scheduler/scheduler-base.cpp
@@ -4,9 +4,10 @@
 
 #include "compiler/scheduler/scheduler-base.h"
 
+#include <atomic>
 #include <cassert>
 
-volatile int tasks_before_sync_node;
+std::atomic_int tasks_before_sync_node = 0;
 
 static SchedulerBase *scheduler;
 
diff --git a/compiler/scheduler/scheduler-base.h b/compiler/scheduler/scheduler-base.h
index f5d7c3fbc9..4b635e3317 100644
--- a/compiler/scheduler/scheduler-base.h
+++ b/compiler/scheduler/scheduler-base.h
@@ -4,6 +4,8 @@
 
 #pragma once
 
+#include <atomic>
+
 class Node;
 
 class Task;
@@ -22,7 +24,7 @@ SchedulerBase *get_scheduler();
 void set_scheduler(SchedulerBase *new_scheduler);
 void unset_scheduler(SchedulerBase *old_scheduler);
 
-extern volatile int tasks_before_sync_node;
+extern std::atomic_int tasks_before_sync_node;
 
 inline void register_async_task(Task *task) {
   get_scheduler()->add_task(task);
diff --git a/compiler/scheduler/scheduler.cpp b/compiler/scheduler/scheduler.cpp
index 633db5dea0..8410efb36e 100644
--- a/compiler/scheduler/scheduler.cpp
+++ b/compiler/scheduler/scheduler.cpp
@@ -4,8 +4,10 @@
 
 #include "compiler/scheduler/scheduler.h"
 
+#include <atomic>
 #include <vector>
 
+#include "compiler/scheduler/scheduler-base.h"
 #include "compiler/scheduler/task.h"
 #include "compiler/threading/thread-id.h"
 #include "compiler/threading/tls.h"
@@ -18,7 +20,7 @@ class ThreadContext {
   class Scheduler *scheduler;
 
   Node *node;
-  bool run_flag;
+  std::atomic_bool run_flag;
 };
 
 
@@ -59,7 +61,7 @@ void Scheduler::execute() {
   for (int i = 1; i <= threads_count; i++) {
     threads[i].thread_id = i;
     threads[i].scheduler = this;
-    threads[i].run_flag = true;
+    threads[i].run_flag.store(true, std::memory_order_relaxed);
     if (i <= (int)one_thread_nodes.size()) {
       threads[i].node = one_thread_nodes[i - 1];
     }
@@ -67,7 +69,7 @@ void Scheduler::execute() {
   }
 
   while (true) {
-    if (tasks_before_sync_node > 0) {
+    if (tasks_before_sync_node.load(std::memory_order_seq_cst) > 0) {
       usleep(250);
       continue;
     }
@@ -79,8 +81,7 @@ void Scheduler::execute() {
   }
 
   for (int i = 1; i <= threads_count; i++) {
-    threads[i].run_flag = false;
-    __sync_synchronize();
+    threads[i].run_flag.store(false, std::memory_order_seq_cst);
     pthread_join(threads[i].pthread_id, nullptr);
   }
 
@@ -101,7 +102,7 @@ bool Scheduler::thread_process_node(Node *node) {
   }
   task->execute();
   delete task;
-  __sync_fetch_and_sub(&tasks_before_sync_node, 1);
+  tasks_before_sync_node.fetch_sub(1, std::memory_order_seq_cst);
   return true;
 }
 
@@ -115,7 +116,7 @@ void Scheduler::thread_execute(ThreadContext *tls) {
     }
     return at_least_one_task_executed;
   };
-  while (tls->run_flag) {
+  while (tls->run_flag.load(std::memory_order_seq_cst)) {
     bool at_least_one_task_executed = false;
     if (tls->node != nullptr) {
       at_least_one_task_executed = process_node(tls->node);
diff --git a/compiler/stage.cpp b/compiler/stage.cpp
index 31db92603a..47a1ba7c44 100644
--- a/compiler/stage.cpp
+++ b/compiler/stage.cpp
@@ -11,6 +11,7 @@
 #include "compiler/data/function-data.h"
 #include "compiler/data/src-file.h"
 #include "compiler/name-gen.h"
+#include "compiler/threading/locks.h"
 #include "compiler/threading/tls.h"
 #include "compiler/utils/string-utils.h"
 
@@ -31,7 +32,7 @@ const char *get_assert_level_desc(AssertLevelT assert_level) {
   }
 }
 
-volatile int ce_locker;
+Mutex ce_locker{};
 
 namespace {
 FILE *warning_file{nullptr};
@@ -44,7 +45,7 @@ void stage::set_warning_file(FILE *file) noexcept {
 void on_compilation_error(const char *description __attribute__((unused)), const char *file_name, int line_number,
                           const char *full_description, AssertLevelT assert_level) {
 
-  AutoLocker<volatile int *> locker(&ce_locker);
+  AutoLocker locker(ce_locker);
   FILE *file = stdout;
   if (assert_level == WRN_ASSERT_LEVEL && warning_file) {
     file = warning_file;
diff --git a/compiler/threading/data-stream.h b/compiler/threading/data-stream.h
index 03e285c184..a1976f5613 100644
--- a/compiler/threading/data-stream.h
+++ b/compiler/threading/data-stream.h
@@ -38,7 +38,7 @@ class DataStream {
 
   void operator<<(DataType input) {
     if (!is_sink_mode_) {
-      __sync_fetch_and_add(&tasks_before_sync_node, 1);
+      tasks_before_sync_node.fetch_add(1, std::memory_order_seq_cst);
     }
     std::lock_guard<std::mutex> lock{mutex_};
     queue_.push_front(std::move(input));
diff --git a/compiler/threading/hash-table.h b/compiler/threading/hash-table.h
index 010be0efe9..2d1d7c74e8 100644
--- a/compiler/threading/hash-table.h
+++ b/compiler/threading/hash-table.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <atomic>
 #include <cassert>
 #include <vector>
 
@@ -13,7 +14,7 @@ template<class T, int N = 1000000>
 class TSHashTable {
 public:
   struct HTNode : Lockable {
-    unsigned long long hash;
+    std::atomic<unsigned long long> hash;
     T data;
 
     HTNode() :
@@ -24,7 +25,7 @@ class TSHashTable {
 
 private:
   HTNode *nodes;
-  int used_size;
+  std::atomic<int> used_size;
 public:
   TSHashTable() :
     nodes(new HTNode[N]),
@@ -34,14 +35,16 @@ class TSHashTable {
   HTNode *at(unsigned long long hash) {
     int i = (unsigned)hash % (unsigned)N;
     while (true) {
-      while (nodes[i].hash != 0 && nodes[i].hash != hash) {
+      while (nodes[i].hash.load(std::memory_order_acquire) != 0 && nodes[i].hash.load(std::memory_order_relaxed) != hash) {
         i++;
         if (i == N) {
           i = 0;
         }
       }
-      if (nodes[i].hash == 0 && !__sync_bool_compare_and_swap(&nodes[i].hash, 0, hash)) {
-        int id = __sync_fetch_and_add(&used_size, 1);
+      unsigned long long expected = 0;
+
+      if (nodes[i].hash.load(std::memory_order_acquire) == expected && !nodes[i].hash.compare_exchange_strong(expected, hash, std::memory_order_acq_rel)) {
+        int id = used_size.fetch_add(1, std::memory_order_relaxed);
         assert(id * 2 < N);
         continue;
       }
@@ -52,21 +55,21 @@ class TSHashTable {
 
   const T *find(unsigned long long hash) {
     int i = (unsigned)hash % (unsigned)N;
-    while (nodes[i].hash != 0 && nodes[i].hash != hash) {
+    while (nodes[i].hash.load(std::memory_order_acquire) != 0 && nodes[i].hash.load(std::memory_order_relaxed) != hash) {
       i++;
       if (i == N) {
         i = 0;
       }
     }
 
-    return nodes[i].hash == hash ? &nodes[i].data : nullptr;
+    return nodes[i].hash.load(std::memory_order_relaxed) == hash ? &nodes[i].data : nullptr;
   }
 
   std::vector<T> get_all() {
     std::vector<T> res;
     res.reserve(used_size);
     for (int i = 0; i < N; i++) {
-      if (nodes[i].hash != 0) {
+      if (nodes[i].hash.load(std::memory_order_acquire) != 0) {
         res.push_back(nodes[i].data);
       }
     }
@@ -77,7 +80,7 @@ class TSHashTable {
   std::vector<T> get_all_if(const CondF &callbackF) {
     std::vector<T> res;
     for (int i = 0; i < N; i++) {
-      if (nodes[i].hash != 0 && callbackF(nodes[i].data)) {
+      if (nodes[i].hash.load(std::memory_order_acquire) != 0 && callbackF(nodes[i].data)) {
         res.push_back(nodes[i].data);
       }
     }
diff --git a/compiler/threading/locks.h b/compiler/threading/locks.h
index e90fb041fe..a6c9b5266b 100644
--- a/compiler/threading/locks.h
+++ b/compiler/threading/locks.h
@@ -5,10 +5,70 @@
 #pragma once
 
 #include <cassert>
+#include <sys/syscall.h>
 #include <unistd.h>
 
-template<class T>
-bool try_lock(T);
+#ifndef __APPLE__
+#include <linux/futex.h>
+#endif
+
+#include "common/wrappers/copyable-atomic.h"
+
+// This Mutex is copyable and lock/unlock may be done on differenet threads
+// std::mutex does not have such properties
+class Mutex {
+ public:
+  void Lock() {
+#ifdef __APPLE__
+    int old = kFree;
+
+    while (!state_.compare_exchange_strong(old, kLockedWithWaiters)) {
+      usleep(250);
+      old = kFree;
+    }
+#else
+    int old = kFree;
+    if (state_.compare_exchange_strong(old, kLockedNoWaiters)) {
+      return;
+    }
+    if (old != kLockedWithWaiters) {
+      // was at least one waiter
+      old = state_.exchange(kLockedWithWaiters);
+    }
+    while (old != kFree) {
+      syscall(SYS_futex, &state_, FUTEX_WAIT, kLockedWithWaiters, 0, 0, 0);
+      old = state_.exchange(kLockedWithWaiters);
+    }
+#endif
+  }
+
+  void Unlock() {
+#ifdef __APPLE__
+    state_.store(kFree);
+#else
+    if (state_.fetch_sub(1) == kLockedWithWaiters) {
+      state_.store(kFree);
+      syscall(SYS_futex, &state_, FUTEX_WAKE, 1, 0, 0, 0); // wake one
+    }
+#endif
+  }
+
+  // https://en.cppreference.com/w/cpp/named_req/BasicLockable
+  void lock() {
+    Lock();
+  }
+
+  void unlock() {
+    Unlock();
+  }
+
+ private:
+  static constexpr int kFree = 0;
+  static constexpr int kLockedNoWaiters = 1;
+  static constexpr int kLockedWithWaiters = 2; // really "may be with waiters"
+  vk::copyable_atomic_integral<int> state_ = kFree;
+};
+
 
 template<class T>
 void lock(T locker) {
@@ -20,36 +80,27 @@ void unlock(T locker) {
   locker->unlock();
 }
 
-inline bool try_lock(volatile int *locker) {
-  return __sync_lock_test_and_set(locker, 1) == 0;
+inline void lock(Mutex &m) {
+  m.Lock();
 }
 
-inline void lock(volatile int *locker) {
-  while (!try_lock(locker)) {
-    usleep(250);
-  }
-}
-
-inline void unlock(volatile int *locker) {
-  assert(*locker == 1);
-  __sync_lock_release(locker);
+inline void unlock(Mutex &m) {
+  m.Unlock();
 }
 
 class Lockable {
 private:
-  volatile int x;
+  Mutex m;
 public:
-  Lockable() :
-    x(0) {}
-
+  Lockable() = default;
   virtual ~Lockable() = default;
 
   void lock() {
-    ::lock(&x);
+    ::lock(m);
   }
 
   void unlock() {
-    ::unlock(&x);
+    ::unlock(m);
   }
 };
 
diff --git a/compiler/threading/tls.h b/compiler/threading/tls.h
index 0b0f2a83f8..6bdbea8364 100644
--- a/compiler/threading/tls.h
+++ b/compiler/threading/tls.h
@@ -25,7 +25,6 @@ struct TLS {
 private:
   struct TLSRaw {
     T data{};
-    volatile int locker = 0;
     char dummy[4096];
   };
 
@@ -69,19 +68,6 @@ struct TLS {
   int size() {
     return MAX_THREADS_COUNT + 1;
   }
-
-  T *lock_get() {
-    TLSRaw *raw = get_raw();
-    bool ok = try_lock(&raw->locker);
-    assert(ok);
-    return &raw->data;
-  }
-
-  void unlock_get(T *ptr) {
-    TLSRaw *raw = get_raw();
-    assert(&raw->data == ptr);
-    unlock(&raw->locker);
-  }
 };
 
 #pragma GCC diagnostic pop
diff --git a/compiler/type-hint.cpp b/compiler/type-hint.cpp
index 4b19b1da1a..e29a3d157f 100644
--- a/compiler/type-hint.cpp
+++ b/compiler/type-hint.cpp
@@ -4,16 +4,16 @@
 
 #include "compiler/type-hint.h"
 
+#include <atomic>
 #include <mutex>
 
 #include "common/php-functions.h"
-
 #include "compiler/data/class-data.h"
-#include "compiler/data/function-data.h"
 #include "compiler/data/ffi-data.h"
+#include "compiler/data/function-data.h"
 #include "compiler/lambda-utils.h"
 #include "compiler/name-gen.h"
-
+#include "compiler/threading/locks.h"
 
 /**
  * This class stores a big hashtable [hash => TypeHint]
@@ -42,8 +42,14 @@ class HasherOfTypeHintForOptimization {
   }
 
   const TypeHint *get_existing() const __attribute__((flatten)) {
-    const auto *result = all_type_hints_ht.find(cur_hash);
-    return result ? *result : nullptr;
+    TSHashTable<const TypeHint *>::HTNode *node = all_type_hints_ht.at(cur_hash);
+    AutoLocker<Lockable *> locker(node);
+
+    if (node->hash.load(std::memory_order_relaxed) == 0) {
+      return nullptr;
+    }
+
+    return node->data;
   }
 
   const TypeHint *add_because_doesnt_exist(TypeHint *newly_created) const __attribute__((noinline)) {
@@ -73,7 +79,8 @@ TSHashTable<const TypeHint *> HasherOfTypeHintForOptimization::all_type_hints_ht
 
 const TypeData *TypeHint::to_type_data() const {
   kphp_assert(is_typedata_constexpr());
-
+  
+  std::lock_guard lock(mutex_for_cache);
   if (!cached_typedata_if_constexpr) {
     TypeData *dst = TypeData::get_type(tp_any)->clone();
     recalc_type_data_in_context_of_call(dst, {}); // call = {}, as constexpr recalculation will never access it
diff --git a/compiler/type-hint.h b/compiler/type-hint.h
index 77d967a332..03c34f921d 100644
--- a/compiler/type-hint.h
+++ b/compiler/type-hint.h
@@ -12,7 +12,6 @@
 #include "compiler/debug.h"
 #include "compiler/inferring/primitive-type.h"
 
-
 // do not confuse TypeHint with TypeData!
 // TypeData is a part of _type inferring_; it's mutable and plain, it represents current inferred state of every vertex
 class TypeData;
@@ -45,6 +44,7 @@ class TypeHint {
 
   // this field is calculated only once on need, see to_type_data()
   mutable const TypeData *cached_typedata_if_constexpr{nullptr};
+  mutable std::mutex mutex_for_cache;
 
 protected:
   enum flag_mask {