diff --git a/src/borrow_tracker/stacked_borrows/mod.rs b/src/borrow_tracker/stacked_borrows/mod.rs
index 488de363bb..76ede552ba 100644
--- a/src/borrow_tracker/stacked_borrows/mod.rs
+++ b/src/borrow_tracker/stacked_borrows/mod.rs
@@ -749,7 +749,7 @@ trait EvalContextPrivExt<'tcx, 'ecx>: crate::MiriInterpCxExt<'tcx> {
                     // Make sure the data race model also knows about this.
                     // FIXME(genmc): Ensure this is still done in GenMC mode. Check for other places where GenMC may need to be informed.
                     if let Some(data_race) = alloc_extra.data_race.as_vclocks_mut() {
-                        data_race.write(
+                        data_race.write_non_atomic(
                             alloc_id,
                             range,
                             NaWriteType::Retag,
@@ -798,7 +798,7 @@ trait EvalContextPrivExt<'tcx, 'ecx>: crate::MiriInterpCxExt<'tcx> {
                         assert_eq!(access, AccessKind::Read);
                         // Make sure the data race model also knows about this.
                         if let Some(data_race) = alloc_extra.data_race.as_vclocks_ref() {
-                            data_race.read(
+                            data_race.read_non_atomic(
                                 alloc_id,
                                 range,
                                 NaReadType::Retag,
diff --git a/src/borrow_tracker/tree_borrows/mod.rs b/src/borrow_tracker/tree_borrows/mod.rs
index 865097af10..5c905fc161 100644
--- a/src/borrow_tracker/tree_borrows/mod.rs
+++ b/src/borrow_tracker/tree_borrows/mod.rs
@@ -366,7 +366,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 // Also inform the data race model (but only if any bytes are actually affected).
                 if range_in_alloc.size.bytes() > 0 {
                     if let Some(data_race) = alloc_extra.data_race.as_vclocks_ref() {
-                        data_race.read(
+                        data_race.read_non_atomic(
                             alloc_id,
                             range_in_alloc,
                             NaReadType::Retag,
diff --git a/src/concurrency/data_race.rs b/src/concurrency/data_race.rs
index bc1bb93a74..75c1d0f6a7 100644
--- a/src/concurrency/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -648,18 +648,17 @@ impl MemoryCellClocks {
             thread_clocks.clock.index_mut(index).span = current_span;
         }
         thread_clocks.clock.index_mut(index).set_read_type(read_type);
-        if self.write_was_before(&thread_clocks.clock) {
-            // We must be ordered-after all atomic writes.
-            let race_free = if let Some(atomic) = self.atomic() {
-                atomic.write_vector <= thread_clocks.clock
-            } else {
-                true
-            };
-            self.read.set_at_index(&thread_clocks.clock, index);
-            if race_free { Ok(()) } else { Err(DataRace) }
-        } else {
-            Err(DataRace)
+        // Check synchronization with non-atomic writes.
+        if !self.write_was_before(&thread_clocks.clock) {
+            return Err(DataRace);
         }
+        // Check synchronization with atomic writes.
+        if !self.atomic().is_none_or(|atomic| atomic.write_vector <= thread_clocks.clock) {
+            return Err(DataRace);
+        }
+        // Record this access.
+        self.read.set_at_index(&thread_clocks.clock, index);
+        Ok(())
     }
 
     /// Detect races for non-atomic write operations at the current memory cell
@@ -675,24 +674,23 @@ impl MemoryCellClocks {
         if !current_span.is_dummy() {
             thread_clocks.clock.index_mut(index).span = current_span;
         }
-        if self.write_was_before(&thread_clocks.clock) && self.read <= thread_clocks.clock {
-            let race_free = if let Some(atomic) = self.atomic() {
-                atomic.write_vector <= thread_clocks.clock
-                    && atomic.read_vector <= thread_clocks.clock
-            } else {
-                true
-            };
-            self.write = (index, thread_clocks.clock[index]);
-            self.write_type = write_type;
-            if race_free {
-                self.read.set_zero_vector();
-                Ok(())
-            } else {
-                Err(DataRace)
-            }
-        } else {
-            Err(DataRace)
+        // Check synchronization with non-atomic accesses.
+        if !(self.write_was_before(&thread_clocks.clock) && self.read <= thread_clocks.clock) {
+            return Err(DataRace);
         }
+        // Check synchronization with atomic accesses.
+        if !self.atomic().is_none_or(|atomic| {
+            atomic.write_vector <= thread_clocks.clock && atomic.read_vector <= thread_clocks.clock
+        }) {
+            return Err(DataRace);
+        }
+        // Record this access.
+        self.write = (index, thread_clocks.clock[index]);
+        self.write_type = write_type;
+        self.read.set_zero_vector();
+        // This is not an atomic location any more.
+        self.atomic_ops = None;
+        Ok(())
     }
 }
 
@@ -758,14 +756,9 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
         let this = self.eval_context_mut();
         this.atomic_access_check(dest, AtomicAccessType::Store)?;
 
-        // Read the previous value so we can put it in the store buffer later.
-        // The program didn't actually do a read, so suppress the memory access hooks.
-        // This is also a very special exception where we just ignore an error -- if this read
-        // was UB e.g. because the memory is uninitialized, we don't want to know!
-        let old_val = this.run_for_validation_ref(|this| this.read_scalar(dest)).discard_err();
-
         // Inform GenMC about the atomic store.
         if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
+            let old_val = this.run_for_validation_ref(|this| this.read_scalar(dest)).discard_err();
             if genmc_ctx.atomic_store(
                 this,
                 dest.ptr().addr(),
@@ -780,6 +773,9 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
             }
             return interp_ok(());
         }
+
+        // Read the previous value so we can put it in the store buffer later.
+        let old_val = this.get_latest_nonatomic_val(dest);
         this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?;
         this.validate_atomic_store(dest, atomic)?;
         this.buffered_atomic_write(val, dest, atomic, old_val)
@@ -1201,7 +1197,7 @@ impl VClockAlloc {
     /// operation for which data-race detection is handled separately, for example
     /// atomic read operations. The `ty` parameter is used for diagnostics, letting
     /// the user know which type was read.
-    pub fn read<'tcx>(
+    pub fn read_non_atomic<'tcx>(
         &self,
         alloc_id: AllocId,
         access_range: AllocRange,
@@ -1243,7 +1239,7 @@ impl VClockAlloc {
     /// being created or if it is temporarily disabled during a racy read or write
     /// operation. The `ty` parameter is used for diagnostics, letting
     /// the user know which type was written.
-    pub fn write<'tcx>(
+    pub fn write_non_atomic<'tcx>(
         &mut self,
         alloc_id: AllocId,
         access_range: AllocRange,
@@ -1540,6 +1536,35 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
         )
     }
 
+    /// Returns the most recent *non-atomic* value stored in the given place.
+    /// Errors if we don't need that (because we don't do store buffering) or if
+    /// the most recent value is in fact atomic.
+    fn get_latest_nonatomic_val(&self, place: &MPlaceTy<'tcx>) -> Result<Option<Scalar>, ()> {
+        let this = self.eval_context_ref();
+        // These cannot fail because `atomic_access_check` was done first.
+        let (alloc_id, offset, _prov) = this.ptr_get_alloc_id(place.ptr(), 0).unwrap();
+        let alloc_meta = &this.get_alloc_extra(alloc_id).unwrap().data_race;
+        if alloc_meta.as_weak_memory_ref().is_none() {
+            // No reason to read old value if we don't track store buffers.
+            return Err(());
+        }
+        let data_race = alloc_meta.as_vclocks_ref().unwrap();
+        // Only read old value if this is currently a non-atomic location.
+        for (_range, clocks) in data_race.alloc_ranges.borrow_mut().iter(offset, place.layout.size)
+        {
+            // If this had an atomic write that's not before the non-atomic write, that should
+            // already be in the store buffer. Initializing the store buffer now would use the
+            // wrong `sync_clock` so we better make sure that does not happen.
+            if clocks.atomic().is_some_and(|atomic| !(atomic.write_vector <= clocks.write())) {
+                return Err(());
+            }
+        }
+        // The program didn't actually do a read, so suppress the memory access hooks.
+        // This is also a very special exception where we just ignore an error -- if this read
+        // was UB e.g. because the memory is uninitialized, we don't want to know!
+        Ok(this.run_for_validation_ref(|this| this.read_scalar(place)).discard_err())
+    }
+
     /// Generic atomic operation implementation
     fn validate_atomic_op<A: Debug + Copy>(
         &self,
diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index 6ebade6a56..2255e0d481 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -177,11 +177,11 @@ impl StoreBufferAlloc {
         Self { store_buffers: RefCell::new(RangeObjectMap::new()) }
     }
 
-    /// When a non-atomic access happens on a location that has been atomically accessed
-    /// before without data race, we can determine that the non-atomic access fully happens
+    /// When a non-atomic write happens on a location that has been atomically accessed
+    /// before without data race, we can determine that the non-atomic write fully happens
     /// after all the prior atomic writes so the location no longer needs to exhibit
-    /// any weak memory behaviours until further atomic accesses.
-    pub fn memory_accessed(&self, range: AllocRange, global: &DataRaceState) {
+    /// any weak memory behaviours until further atomic writes.
+    pub fn non_atomic_write(&self, range: AllocRange, global: &DataRaceState) {
         if !global.ongoing_action_data_race_free() {
             let mut buffers = self.store_buffers.borrow_mut();
             let access_type = buffers.access_type(range);
@@ -223,18 +223,23 @@ impl StoreBufferAlloc {
     fn get_or_create_store_buffer_mut<'tcx>(
         &mut self,
         range: AllocRange,
-        init: Option<Scalar>,
+        init: Result<Option<Scalar>, ()>,
     ) -> InterpResult<'tcx, &mut StoreBuffer> {
         let buffers = self.store_buffers.get_mut();
         let access_type = buffers.access_type(range);
         let pos = match access_type {
             AccessType::PerfectlyOverlapping(pos) => pos,
             AccessType::Empty(pos) => {
+                let init =
+                    init.expect("cannot have empty store buffer when previous write was atomic");
                 buffers.insert_at_pos(pos, range, StoreBuffer::new(init));
                 pos
             }
             AccessType::ImperfectlyOverlapping(pos_range) => {
                 // Once we reach here we would've already checked that this access is not racy.
+                let init = init.expect(
+                    "cannot have partially overlapping store buffer when previous write was atomic",
+                );
                 buffers.remove_pos_range(pos_range.clone());
                 buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
                 pos_range.start
@@ -490,7 +495,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             }
             let range = alloc_range(base_offset, place.layout.size);
             let sync_clock = data_race_clocks.sync_clock(range);
-            let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, Some(init))?;
+            let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, Ok(Some(init)))?;
             // The RMW always reads from the most recent store.
             buffer.read_from_last_store(global, threads, atomic == AtomicRwOrd::SeqCst);
             buffer.buffered_write(
@@ -556,7 +561,8 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
     /// Add the given write to the store buffer. (Does not change machine memory.)
     ///
     /// `init` says with which value to initialize the store buffer in case there wasn't a store
-    /// buffer for this memory range before.
+    /// buffer for this memory range before. `Err(())` means the value is not available;
+    /// `Ok(None)` means the memory does not contain a valid scalar.
     ///
     /// Must be called *after* `validate_atomic_store` to ensure that `sync_clock` is up-to-date.
     fn buffered_atomic_write(
@@ -564,7 +570,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         val: Scalar,
         dest: &MPlaceTy<'tcx>,
         atomic: AtomicWriteOrd,
-        init: Option<Scalar>,
+        init: Result<Option<Scalar>, ()>,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr(), 0)?;
diff --git a/src/machine.rs b/src/machine.rs
index 9c6eafbd74..33f854bee2 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -1536,14 +1536,11 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
                 genmc_ctx.memory_load(machine, ptr.addr(), range.size)?,
             GlobalDataRaceHandler::Vclocks(_data_race) => {
                 let _trace = enter_trace_span!(data_race::before_memory_read);
-                let AllocDataRaceHandler::Vclocks(data_race, weak_memory) = &alloc_extra.data_race
+                let AllocDataRaceHandler::Vclocks(data_race, _weak_memory) = &alloc_extra.data_race
                 else {
                     unreachable!();
                 };
-                data_race.read(alloc_id, range, NaReadType::Read, None, machine)?;
-                if let Some(weak_memory) = weak_memory {
-                    weak_memory.memory_accessed(range, machine.data_race.as_vclocks_ref().unwrap());
-                }
+                data_race.read_non_atomic(alloc_id, range, NaReadType::Read, None, machine)?;
             }
         }
         if let Some(borrow_tracker) = &alloc_extra.borrow_tracker {
@@ -1579,9 +1576,10 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
                 else {
                     unreachable!()
                 };
-                data_race.write(alloc_id, range, NaWriteType::Write, None, machine)?;
+                data_race.write_non_atomic(alloc_id, range, NaWriteType::Write, None, machine)?;
                 if let Some(weak_memory) = weak_memory {
-                    weak_memory.memory_accessed(range, machine.data_race.as_vclocks_ref().unwrap());
+                    weak_memory
+                        .non_atomic_write(range, machine.data_race.as_vclocks_ref().unwrap());
                 }
             }
         }
@@ -1612,7 +1610,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
             GlobalDataRaceHandler::Vclocks(_global_state) => {
                 let _trace = enter_trace_span!(data_race::before_memory_deallocation);
                 let data_race = alloc_extra.data_race.as_vclocks_mut().unwrap();
-                data_race.write(
+                data_race.write_non_atomic(
                     alloc_id,
                     alloc_range(Size::ZERO, size),
                     NaWriteType::Deallocate,
diff --git a/tests/pass/0weak_memory/weak.rs b/tests/pass/0weak_memory/weak.rs
index e329bbff1a..8d3c851861 100644
--- a/tests/pass/0weak_memory/weak.rs
+++ b/tests/pass/0weak_memory/weak.rs
@@ -88,10 +88,20 @@ fn initialization_write(add_fence: bool) {
     check_all_outcomes([11, 22], || {
         let x = static_atomic(11);
 
+        if add_fence {
+            // For the fun of it, let's make this location atomic and non-atomic again,
+            // to ensure Miri updates the state properly for that.
+            x.store(99, Relaxed);
+            unsafe { std::ptr::from_ref(x).cast_mut().write(11.into()) };
+        }
+
         let wait = static_atomic(0);
 
         let j1 = spawn(move || {
             x.store(22, Relaxed);
+            // Since nobody else writes to `x`, we can non-atomically read it.
+            // (This tests that we do not delete the store buffer here.)
+            unsafe { std::ptr::from_ref(x).read() };
             // Relaxed is intentional. We want to test if the thread 2 reads the initialisation write
             // after a relaxed write
             wait.store(1, Relaxed);
diff --git a/tests/pass/concurrency/issue1643.rs b/tests/pass/concurrency/issue-miri-1643.rs
similarity index 100%
rename from tests/pass/concurrency/issue1643.rs
rename to tests/pass/concurrency/issue-miri-1643.rs
diff --git a/tests/pass/concurrency/issue-miri-4655-mix-atomic-nonatomic.rs b/tests/pass/concurrency/issue-miri-4655-mix-atomic-nonatomic.rs
new file mode 100644
index 0000000000..fd9d3a349d
--- /dev/null
+++ b/tests/pass/concurrency/issue-miri-4655-mix-atomic-nonatomic.rs
@@ -0,0 +1,42 @@
+//! This reproduces #4655 every single time
+//@ compile-flags: -Zmiri-fixed-schedule -Zmiri-disable-stacked-borrows
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::{ptr, thread};
+
+const SIZE: usize = 256;
+
+static mut ARRAY: [u8; SIZE] = [0; _];
+// Everything strictly less than this has been initialized by the sender.
+static POS: AtomicUsize = AtomicUsize::new(0);
+
+fn main() {
+    // Sender
+    let th = std::thread::spawn(|| {
+        for i in 0..SIZE {
+            unsafe { ptr::write(&raw mut ARRAY[i], 1) };
+            POS.store(i + 1, Ordering::Release);
+
+            thread::yield_now();
+
+            // We are the only writer, so we can do non-atomic reads as well.
+            unsafe { (&raw const POS).cast::<usize>().read() };
+        }
+    });
+
+    // Receiver
+    loop {
+        let i = POS.load(Ordering::Acquire);
+
+        if i > 0 {
+            unsafe { ptr::read(&raw const ARRAY[i - 1]) };
+        }
+
+        if i == SIZE {
+            break;
+        }
+
+        thread::yield_now();
+    }
+
+    th.join().unwrap();
+}