Skip to content

[GR-52711] Implement GraalWasm SIMD proposal using the Vector API. #11630

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Jul 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
c43b7ee
Initial GraalWasm SIMD implementation using Vector API
jirkamarsik Apr 28, 2025
598f418
Use the module path for GraalWasm tasks
jirkamarsik May 22, 2025
c6b6286
Build native image for GraalWasm with Vector API
jirkamarsik May 22, 2025
e24704a
Precompute more caches in VectorAPIFeature
jirkamarsik May 26, 2025
4fec35c
Add GraalWasm microbenchmark for f64x2.nearest and f64x2.floor
jirkamarsik Jun 2, 2025
7987d3d
Add TruffleBoundaries to Vector API implementation
jirkamarsik Jun 3, 2025
d61f9aa
Add AssertionError to skippedExceptionTypes
jirkamarsik Jun 17, 2025
20d522f
Add GraalWasm SIMD benchmarks to polybench
jirkamarsik Jun 19, 2025
ae8649a
Only build error messages when needed
woess Jun 22, 2025
df0c417
Vector API: Preinitialize ELEMENT_LAYOUT VarHandle used by memorySegm…
woess Jun 22, 2025
ec0ec65
Substitute memorySegmentGet/Set to trigger VarHandle inlining before …
woess Jun 22, 2025
a64b806
Add missing tflsm component to ce-nodejs
jirkamarsik Jul 11, 2025
52128db
Avoid calling vector constructors during Vector128Ops clinit
jirkamarsik Jul 15, 2025
f57e2f2
Fix broken assertions in VectorAPIFeature
jirkamarsik Jul 16, 2025
51addd3
Move Vector API SIMD implementation to separate project
jirkamarsik Jul 17, 2025
1808790
Do not set Vector API specific options for libwasmvm on pre-JDK25 Gra…
jirkamarsik Jul 19, 2025
dc0bb08
Allow VectorMathLibrary#lookup in GraalWasm's Truffle Native TCK
jirkamarsik Jul 19, 2025
0373c0d
Fix deopt of 0-length virtual array in presence of VectorAPI
jirkamarsik Jul 22, 2025
eb23711
MethodHandle substitutions refactoring
jirkamarsik Jul 22, 2025
a777f55
Use -JUnitOpenPackages to open GraalWasm for unit tests
jirkamarsik Jul 22, 2025
3b0939a
Refactoring VectorAPIFeature
jirkamarsik Jul 22, 2025
32078dc
Use tempTargetAllowList instead of substitutions for Vector API
jirkamarsik Jul 23, 2025
f3b3fd2
Do not intrinsify Integer/Long.reverse in web-image
jirkamarsik Jul 23, 2025
0d10d32
Run SIMD microbenchmarks from WatBenchmarkSuite in CI
jirkamarsik Jul 24, 2025
369f139
Eagerly initialize Vector API var handles
woess Jul 24, 2025
46844ea
Add casts before reinterpreting SIMD operands
woess Jul 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ public class KnownTruffleTypes extends AbstractKnownTruffleTypes {
public final ResolvedJavaType BufferOverflowException = lookupType(BufferOverflowException.class);
public final ResolvedJavaType ReadOnlyBufferException = lookupType(ReadOnlyBufferException.class);
public final ResolvedJavaType ScopedMemoryAccess_ScopedAccessError = lookupTypeOptional("jdk.internal.misc.ScopedMemoryAccess$ScopedAccessError");
public final ResolvedJavaType AssertionError = lookupType(AssertionError.class);
public final ResolvedJavaType AbstractMemorySegmentImpl = lookupTypeOptional("jdk.internal.foreign.AbstractMemorySegmentImpl");
public final ResolvedJavaType MemorySegmentProxy = lookupTypeOptional("jdk.internal.access.foreign.MemorySegmentProxy");

Expand Down Expand Up @@ -245,6 +246,7 @@ private ResolvedJavaType[] createSkippedExceptionTypes() {
types.add(BufferUnderflowException);
types.add(BufferOverflowException);
types.add(ReadOnlyBufferException);
types.add(AssertionError);
return types.toArray(ResolvedJavaType[]::new);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,9 @@ private Object materializeObject(int virtualObjectId, FrameInfoQueryResult sourc

if (ImageSingletons.contains(VectorAPIDeoptimizationSupport.class)) {
VectorAPIDeoptimizationSupport deoptSupport = ImageSingletons.lookup(VectorAPIDeoptimizationSupport.class);
Object payloadArray = deoptSupport.materializePayload(this, hub, encodings[curIdx], sourceFrame);
if (payloadArray != null) {
VectorAPIDeoptimizationSupport.PayloadLayout payloadLayout = deoptSupport.getLayout(DynamicHub.toClass(hub));
if (payloadLayout != null) {
Object payloadArray = deoptSupport.materializePayload(this, payloadLayout, encodings[curIdx], sourceFrame);
JavaConstant arrayConstant = SubstrateObjectConstant.forObject(payloadArray, ReferenceAccess.singleton().haveCompressedReferences());
Deoptimizer.writeValueInMaterializedObj(obj, curOffset, arrayConstant, sourceFrame);
return obj;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import com.oracle.svm.core.code.FrameInfoQueryResult;
import com.oracle.svm.core.config.ConfigurationValues;
import com.oracle.svm.core.config.ObjectLayout;
import com.oracle.svm.core.hub.DynamicHub;

import jdk.graal.compiler.word.Word;
import jdk.vm.ci.meta.JavaConstant;
Expand All @@ -45,41 +44,36 @@
public class VectorAPIDeoptimizationSupport {

/**
* If the {@code hub} refers to a Vector API vector, materialize its payload array. That is,
* allocate a primitive array of the appropriate element type and length for the Vector API
* value. Read the vector's entries from the stack and store them in the array.
* Materialize the payload array of a Vector API class. That is, allocate a primitive array of
* the appropriate element type and length for the Vector API value. Read the vector's entries
* from the stack and store them in the array.
*
* @param deoptState state for accessing values on the stack
* @param hub the hub of the object to be materialized
* @param layout non-null payload layout from {@link #getLayout}
* @param vectorEncoding describes the location of the vector on the stack
* @param sourceFrame the source frame containing the vector
* @return a materialized primitive array if the object to be materialized is a Vector API
* vector; {@code null} otherwise
*/
public Object materializePayload(DeoptState deoptState, DynamicHub hub, FrameInfoQueryResult.ValueInfo vectorEncoding, FrameInfoQueryResult sourceFrame) {
Class<?> vectorClass = DynamicHub.toClass(hub);
PayloadLayout layout = typeMap.get(vectorClass);
if (layout != null) {
/*
* Read values from the stack and write them to an array of the same element type. Note
* that vector masks in states are already represented as vectors of byte-sized 0 or 1
* values, this is ensured by the VectorAPIExpansionPhase. Therefore, this code does not
* need to worry about the target's representation of vector masks; an element type of
* boolean in the layout will allow us to handle masks correctly.
*/
JavaKind elementKind = JavaKind.fromJavaClass(layout.elementType);
Object array = Array.newInstance(layout.elementType, layout.vectorLength);
ObjectLayout objectLayout = ConfigurationValues.getObjectLayout();
UnsignedWord curOffset = Word.unsigned(objectLayout.getArrayBaseOffset(elementKind));
for (int i = 0; i < layout.vectorLength; i++) {
FrameInfoQueryResult.ValueInfo elementEncoding = vectorEncoding.copyForElement(elementKind, i * elementKind.getByteCount());
JavaConstant con = readValue(deoptState, elementEncoding, sourceFrame);
writeValueInMaterializedObj(array, curOffset, con, sourceFrame);
curOffset = curOffset.add(objectLayout.sizeInBytes(elementKind));
}
return array;
public Object materializePayload(DeoptState deoptState, PayloadLayout layout, FrameInfoQueryResult.ValueInfo vectorEncoding, FrameInfoQueryResult sourceFrame) {
/*
* Read values from the stack and write them to an array of the same element type. Note that
* vector masks in states are already represented as vectors of byte-sized 0 or 1 values,
* this is ensured by the VectorAPIExpansionPhase. Therefore, this code does not need to
* worry about the target's representation of vector masks; an element type of boolean in
* the layout will allow us to handle masks correctly.
*/
JavaKind elementKind = JavaKind.fromJavaClass(layout.elementType);
Object array = Array.newInstance(layout.elementType, layout.vectorLength);
ObjectLayout objectLayout = ConfigurationValues.getObjectLayout();
UnsignedWord curOffset = Word.unsigned(objectLayout.getArrayBaseOffset(elementKind));
for (int i = 0; i < layout.vectorLength; i++) {
FrameInfoQueryResult.ValueInfo elementEncoding = vectorEncoding.copyForElement(elementKind, i * elementKind.getByteCount());
JavaConstant con = readValue(deoptState, elementEncoding, sourceFrame);
writeValueInMaterializedObj(array, curOffset, con, sourceFrame);
curOffset = curOffset.add(objectLayout.sizeInBytes(elementKind));
}
return null;
return array;
}

protected static JavaConstant readValue(DeoptState deoptState, FrameInfoQueryResult.ValueInfo valueInfo, FrameInfoQueryResult sourceFrame) {
Expand Down Expand Up @@ -108,4 +102,8 @@ public record PayloadLayout(Class<?> elementType, int vectorLength) {
public void putLayout(Class<?> vectorClass, PayloadLayout layout) {
typeMap.put(vectorClass, layout);
}

public PayloadLayout getLayout(Class<?> vectorClass) {
return typeMap.get(vectorClass);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
*/
package com.oracle.svm.core.jdk;

import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.util.function.IntFunction;
import java.util.stream.Collectors;

import org.graalvm.nativeimage.ImageSingletons;
Expand Down Expand Up @@ -109,6 +112,48 @@ private static final class Target_jdk_incubator_vector_VectorOperators_Conversio
@AlwaysInline("Vector API performance")
private static native Target_jdk_incubator_vector_VectorOperators_ConversionImpl<?, ?> ofReinterpret(Target_jdk_incubator_vector_LaneType dom, Target_jdk_incubator_vector_LaneType ran);
}

@TargetClass(className = "jdk.incubator.vector.VectorOperators", innerClass = "Operator", onlyWith = VectorAPIEnabled.class)
interface Target_jdk_incubator_vector_VectorOperators_Operator {
}

@TargetClass(className = "jdk.incubator.vector.VectorOperators", innerClass = "ImplCache", onlyWith = VectorAPIEnabled.class)
static final class Target_jdk_incubator_vector_VectorOperators_ImplCache<OP extends Target_jdk_incubator_vector_VectorOperators_Operator, T> {

@Alias Object[] cache;

/*
* We substitute ImplCache#find to remove the call to isNonCapturingLambda. In the process,
* we simplify the cache lookup by removing lazy cache initialization as we precompute the
* cache.
*/
@Substitute
@AlwaysInline("Vector API fast-path")
@SuppressWarnings({"unchecked", "unused"})
public T find(OP op, int opc, IntFunction<T> supplier) {
T fn = (T) cache[opc];
return fn;
}
}
}

@TargetClass(className = "jdk.incubator.vector.AbstractVector", onlyWith = VectorAPIEnabled.class)
final class Target_jdk_incubator_vector_AbstractVector {
}

@TargetClass(className = "jdk.incubator.vector.AbstractSpecies", onlyWith = VectorAPIEnabled.class)
final class Target_jdk_incubator_vector_AbstractSpecies {

@Alias private Target_jdk_incubator_vector_AbstractVector dummyVector;

/*
* We initialize the `dummyVector` fields during image build-time using VectorAPIFeature. We can
* have the getter method return the precomputed dummy vector directly.
*/
@Substitute
Target_jdk_incubator_vector_AbstractVector dummyVector() {
return dummyVector;
}
}

@TargetClass(className = "jdk.incubator.vector.ByteVector", onlyWith = VectorAPIEnabled.class)
Expand All @@ -119,6 +164,20 @@ final class Target_jdk_incubator_vector_ByteVector {
@Alias @RecomputeFieldValue(kind = RecomputeFieldValue.Kind.ArrayBaseOffset, declClass = byte[].class, isFinal = true) //
@TargetElement(name = "ARRAY_BASE") //
private static long arrayBase;

@Alias @RecomputeFieldValue(isFinal = true, kind = RecomputeFieldValue.Kind.None) //
@TargetElement(name = "ELEMENT_LAYOUT") //
static ValueLayout.OfByte elementLayout;

@Substitute
static void memorySegmentSet(MemorySegment ms, long o, int i, byte e) {
elementLayout.varHandle().set(ms, o + i * 1L, e);
}

@Substitute
static byte memorySegmentGet(MemorySegment ms, long o, int i) {
return (byte) elementLayout.varHandle().get(ms, o + i * 1L);
}
}

@TargetClass(className = "jdk.incubator.vector.ShortVector", onlyWith = VectorAPIEnabled.class)
Expand All @@ -129,6 +188,20 @@ final class Target_jdk_incubator_vector_ShortVector {
@Alias @RecomputeFieldValue(kind = RecomputeFieldValue.Kind.ArrayBaseOffset, declClass = short[].class, isFinal = true) //
@TargetElement(name = "ARRAY_BASE") //
private static long arrayBase;

@Alias @RecomputeFieldValue(isFinal = true, kind = RecomputeFieldValue.Kind.None) //
@TargetElement(name = "ELEMENT_LAYOUT") //
static ValueLayout.OfShort elementLayout;

@Substitute
static void memorySegmentSet(MemorySegment ms, long o, int i, short e) {
elementLayout.varHandle().set(ms, o + i * 2L, e);
}

@Substitute
static short memorySegmentGet(MemorySegment ms, long o, int i) {
return (short) elementLayout.varHandle().get(ms, o + i * 2L);
}
}

@TargetClass(className = "jdk.incubator.vector.IntVector", onlyWith = VectorAPIEnabled.class)
Expand All @@ -139,6 +212,20 @@ final class Target_jdk_incubator_vector_IntVector {
@Alias @RecomputeFieldValue(kind = RecomputeFieldValue.Kind.ArrayBaseOffset, declClass = int[].class, isFinal = true) //
@TargetElement(name = "ARRAY_BASE") //
private static long arrayBase;

@Alias @RecomputeFieldValue(isFinal = true, kind = RecomputeFieldValue.Kind.None) //
@TargetElement(name = "ELEMENT_LAYOUT") //
static ValueLayout.OfInt elementLayout;

@Substitute
static void memorySegmentSet(MemorySegment ms, long o, int i, int e) {
elementLayout.varHandle().set(ms, o + i * 4L, e);
}

@Substitute
static int memorySegmentGet(MemorySegment ms, long o, int i) {
return (int) elementLayout.varHandle().get(ms, o + i * 4L);
}
}

@TargetClass(className = "jdk.incubator.vector.LongVector", onlyWith = VectorAPIEnabled.class)
Expand All @@ -149,6 +236,20 @@ final class Target_jdk_incubator_vector_LongVector {
@Alias @RecomputeFieldValue(kind = RecomputeFieldValue.Kind.ArrayBaseOffset, declClass = long[].class, isFinal = true) //
@TargetElement(name = "ARRAY_BASE") //
private static long arrayBase;

@Alias @RecomputeFieldValue(isFinal = true, kind = RecomputeFieldValue.Kind.None) //
@TargetElement(name = "ELEMENT_LAYOUT") //
static ValueLayout.OfLong elementLayout;

@Substitute
static void memorySegmentSet(MemorySegment ms, long o, int i, long e) {
elementLayout.varHandle().set(ms, o + i * 8L, e);
}

@Substitute
static long memorySegmentGet(MemorySegment ms, long o, int i) {
return (long) elementLayout.varHandle().get(ms, o + i * 8L);
}
}

@TargetClass(className = "jdk.incubator.vector.FloatVector", onlyWith = VectorAPIEnabled.class)
Expand All @@ -159,6 +260,20 @@ final class Target_jdk_incubator_vector_FloatVector {
@Alias @RecomputeFieldValue(kind = RecomputeFieldValue.Kind.ArrayBaseOffset, declClass = float[].class, isFinal = true) //
@TargetElement(name = "ARRAY_BASE") //
private static long arrayBase;

@Alias @RecomputeFieldValue(isFinal = true, kind = RecomputeFieldValue.Kind.None) //
@TargetElement(name = "ELEMENT_LAYOUT") //
static ValueLayout.OfFloat elementLayout;

@Substitute
static void memorySegmentSet(MemorySegment ms, long o, int i, float e) {
elementLayout.varHandle().set(ms, o + i * 4L, e);
}

@Substitute
static float memorySegmentGet(MemorySegment ms, long o, int i) {
return (float) elementLayout.varHandle().get(ms, o + i * 4L);
}
}

@TargetClass(className = "jdk.incubator.vector.DoubleVector", onlyWith = VectorAPIEnabled.class)
Expand All @@ -169,4 +284,18 @@ final class Target_jdk_incubator_vector_DoubleVector {
@Alias @RecomputeFieldValue(kind = RecomputeFieldValue.Kind.ArrayBaseOffset, declClass = double[].class, isFinal = true) //
@TargetElement(name = "ARRAY_BASE") //
private static long arrayBase;

@Alias @RecomputeFieldValue(isFinal = true, kind = RecomputeFieldValue.Kind.None) //
@TargetElement(name = "ELEMENT_LAYOUT") //
static ValueLayout.OfDouble elementLayout;

@Substitute
static void memorySegmentSet(MemorySegment ms, long o, int i, double e) {
elementLayout.varHandle().set(ms, o + i * 8L, e);
}

@Substitute
static double memorySegmentGet(MemorySegment ms, long o, int i) {
return (double) elementLayout.varHandle().get(ms, o + i * 8L);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,11 @@ private static SubstrateConstructorAccessor getConstructorAccessor(Constructor<?
}

private static <T extends AccessibleObject & Member> void checkMember(T member, boolean isStatic) {
VMError.guarantee(Modifier.isStatic(member.getModifiers()) == isStatic,
"Cannot perform %s operation on a %s member".formatted(isStatic ? "static" : "non-static", isStatic ? "non-static" : "static"));
if (Modifier.isStatic(member.getModifiers()) != isStatic) {
throw VMError.shouldNotReachHere("Cannot perform " +
(isStatic ? "static" : "non-static") + " operation on a " +
(isStatic ? "non-static" : "static") + " member");
}
}

private static SubstrateAccessor getAccessor(Target_java_lang_invoke_MemberName memberName) {
Expand All @@ -295,7 +298,10 @@ private static SubstrateAccessor getAccessor(Target_java_lang_invoke_MemberName
}

private static void checkArgs(Object[] args, int expectedLength, String methodName) {
VMError.guarantee((expectedLength == 0 && args == null) || args.length == expectedLength, "%s requires exactly %d arguments".formatted(methodName, expectedLength));
if ((expectedLength == 0 && args == null) || args.length == expectedLength) {
return;
}
throw VMError.shouldNotReachHere(methodName + " requires exactly " + expectedLength + " arguments");
}

private static void convertArgs(Object[] args, MethodType methodType) throws Throwable {
Expand Down
Loading