From c1cd95fd0e0460e52b3cfc81985f129ce55b8264 Mon Sep 17 00:00:00 2001
From: momo5502 <mauriceheumann@gmail.com>
Date: Sun, 4 Jan 2026 12:02:27 +0100
Subject: [PATCH 1/3] Use __builtin_sqrt instead of using std library functions

__builtin_sqrt is already used in a few other places in the semantics.
This commit just makes this consistent for all sqrt computations.
---
 lib/Arch/X86/Runtime/Instructions.cpp | 2 +-
 lib/Arch/X86/Semantics/SSE.cpp        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/Arch/X86/Runtime/Instructions.cpp b/lib/Arch/X86/Runtime/Instructions.cpp
index a3b940064..a06b0d66b 100644
--- a/lib/Arch/X86/Runtime/Instructions.cpp
+++ b/lib/Arch/X86/Runtime/Instructions.cpp
@@ -191,7 +191,7 @@ DEF_HELPER(SquareRoot32, float32_t src_float)->float32_t {
       uint32_t indef_qnan = 0xFFC00000U;
       square_root = reinterpret_cast<float32_t &>(indef_qnan);
     } else {
-      square_root = std::sqrt(src_float);
+      square_root = __builtin_sqrtf(src_float);
     }
   }
 
diff --git a/lib/Arch/X86/Semantics/SSE.cpp b/lib/Arch/X86/Semantics/SSE.cpp
index 33fd94699..d70615e3e 100644
--- a/lib/Arch/X86/Semantics/SSE.cpp
+++ b/lib/Arch/X86/Semantics/SSE.cpp
@@ -1794,7 +1794,7 @@ DEF_HELPER(SquareRoot64, float64_t src_float)->float64_t {
       uint64_t indef_qnan = 0xFFF8000000000000ULL;
       square_root = reinterpret_cast<float64_t &>(indef_qnan);
     } else {
-      square_root = std::sqrt(src_float);
+      square_root = __builtin_sqrt(src_float);
     }
   }
 

From 8e5b5208ec444c999513e598714b8104e98b92e7 Mon Sep 17 00:00:00 2001
From: momo5502 <mauriceheumann@gmail.com>
Date: Sun, 4 Jan 2026 12:07:59 +0100
Subject: [PATCH 2/3] Add SQRTPD semantics

---
 lib/Arch/X86/Semantics/SSE.cpp | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/lib/Arch/X86/Semantics/SSE.cpp b/lib/Arch/X86/Semantics/SSE.cpp
index d70615e3e..f52ef3f25 100644
--- a/lib/Arch/X86/Semantics/SSE.cpp
+++ b/lib/Arch/X86/Semantics/SSE.cpp
@@ -1849,6 +1849,27 @@ IF_AVX(DEF_ISEL(VSQRTSD_XMMdq_XMMdq_XMMq) = VSQRTSD<VV128W, V128, V128>;)
 4297 VSQRTSD VSQRTSD_XMMf64_MASKmskw_XMMf64_MEMf64_AVX512 AVX512 AVX512EVEX AVX512F_SCALAR ATTRIBUTES: DISP8_SCALAR MASKOP_EVEX MEMORY_FAULT_SUPPRESSION MXCSR SIMD_SCALAR
 */
 
+namespace {
+
+template <typename D, typename S1>
+DEF_SEM(SQRTPD, D dst, S1 src1) {
+  auto src_vec = FReadV64(src1);
+
+  auto sqrt_0 = SquareRoot64(memory, state, FExtractV64(src_vec, 0));
+  auto sqrt_1 = SquareRoot64(memory, state, FExtractV64(src_vec, 1));
+
+  auto temp_vec = FReadV64(dst);
+  temp_vec = FInsertV64(temp_vec, 0, sqrt_0);
+  temp_vec = FInsertV64(temp_vec, 1, sqrt_1);
+
+  FWriteV64(dst, temp_vec);
+  return memory;
+}
+
+}  // namespace
+
+DEF_ISEL(SQRTPD_XMMpd_MEMpd) = SQRTPD<V128W, MV128>;
+DEF_ISEL(SQRTPD_XMMpd_XMMpd) = SQRTPD<V128W, V128>;
 
 namespace {
 

From 2f109b659dd78aea27b8cb9379bfeaee4f728c05 Mon Sep 17 00:00:00 2001
From: momo5502 <mauriceheumann@gmail.com>
Date: Sun, 4 Jan 2026 12:58:51 +0100
Subject: [PATCH 3/3] Add SQRTPD tests

---
 tests/X86/SSE/SQRTPD.S | 44 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 tests/X86/SSE/SQRTPD.S

diff --git a/tests/X86/SSE/SQRTPD.S b/tests/X86/SSE/SQRTPD.S
new file mode 100644
index 000000000..ac7a8ae6a
--- /dev/null
+++ b/tests/X86/SSE/SQRTPD.S
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017 Trail of Bits, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#define SQRT_INPUTS_64 \
+    0x4022000000000000,   /* 9.0, a "perfect square," of which sqrt() is an integer */\
+    0x0000000000000000,   /* 0.0, and sqrt(0) should be 0 */\
+    0x4025C7AE147AE148,   /* 10.89, should return 3.33 */\
+    0x402399999999999A,   /* 9.8, whose square root result needs to be rounded */\
+    0x7FF0000000000001,   /* SNaN, should convert to a QNaN */\
+    0xFFF0000000000001,   /* -SNaN, should convert to a -QNaN */\
+    0x7FF0000000000000,   /* inf */\
+    0xBFF0000000000000,   /* -1.0, should return a constant (the QNaN floating-point indefinite) */\
+    0xFFF0000000000000,   /* -inf */\
+    0x8000000000000000    /* -0.0 */
+
+TEST_BEGIN_64(SQRTPDv128v128, 2)
+TEST_INPUTS(SQRT_INPUTS_64)
+    movq xmm1, ARG1_64;      // load the first double-precision floating point value into xmm1[63:0]
+    movq xmm2, ARG2_64;      // load the second double-precision floating point value into xmm2[63:0]
+    movlhpd xmm1, xmm2;      // pack both values: xmm1[127:64] = xmm2[63:0], xmm1[63:0] unchanged
+    sqrtpd xmm0, xmm1;       // find the sqrt of both DP FP values of xmm1, store in xmm0
+TEST_END_64
+
+TEST_BEGIN_64(SQRTPDv128m128, 2)
+TEST_INPUTS(SQRT_INPUTS_64)
+    push ARG2_64;            // push the second double-precision floating point value (high QWORD)
+    push ARG1_64;            // push the first double-precision floating point value (low QWORD)
+    sqrtpd xmm0, xmmword ptr [rsp];  // sqrt of both DP FP values at [rsp], store in xmm0
+TEST_END_64
+