Skip to content

I gained a 50% performance increase by merging rejected changes #598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions src/main/java/net/vulkanmod/gl/GlTexture.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public class GlTexture {
private static final Int2ReferenceOpenHashMap<GlTexture> map = new Int2ReferenceOpenHashMap<>();
private static int boundTextureId = 0;
private static GlTexture boundTexture;
private static GlTexture defaultTexture;
private static int activeTexture = 0;

private static int unpackRowLength;
Expand All @@ -39,11 +40,26 @@ public static int genTextureId() {
return id;
}

// Default texture can vary depending on graphic drivers,
// but it's usually a square that is either white or black
private static GlTexture getDefaultTexture() {
if (GlTexture.defaultTexture != null)
return GlTexture.defaultTexture;
GlTexture defaultTexture = new GlTexture(0);
defaultTexture.vulkanImage = VulkanImage.createWhiteTexture();
GlTexture.defaultTexture = defaultTexture;
return defaultTexture;
}

public static void bindTexture(int id) {
boundTextureId = id;
boundTexture = map.get(id);
if (id == 0) {
boundTexture = getDefaultTexture();
} else {
boundTexture = map.get(id);
}

if (id <= 0)
if (id < 0)
return;

if (boundTexture == null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,10 @@ private int getMask(Vec3 camera, RenderSection section) {
public void bindBuffers(VkCommandBuffer commandBuffer, Pipeline pipeline, TerrainRenderType terrainRenderType, double camX, double camY, double camZ) {
try (MemoryStack stack = MemoryStack.stackPush()) {
var vertexBuffer = getAreaBuffer(terrainRenderType);
nvkCmdBindVertexBuffers(commandBuffer, 0, 1, stack.npointer(vertexBuffer.getId()), stack.npointer(0));
// VkBuffer is either a 64-bit unsigned integer or a 64-bit pointer, and
// VkDeviceSize is always an uint64_t. Both are equivalent to a Java "long",
// so use "stack.nlong" here.
nvkCmdBindVertexBuffers(commandBuffer, 0, 1, stack.nlong(vertexBuffer.getId()), stack.nlong(0));
updateChunkAreaOrigin(commandBuffer, pipeline, camX, camY, camZ, stack);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public void copyBuffer(Buffer src, long srcOffset, Buffer dst, long dstOffset, l
public void syncUploads() {
submitUploads();

Synchronization.INSTANCE.waitFences();
Synchronization.INSTANCE.recycleCmdBuffers();
}

private void beginCommands() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public void submitCommands() {
return;
}

long fence = queue.submitCommands(this.currentCmdBuffer);
queue.submitCommands(this.currentCmdBuffer);
Synchronization.INSTANCE.addCommandBuffer(this.currentCmdBuffer);

this.currentCmdBuffer = null;
Expand Down
186 changes: 114 additions & 72 deletions src/main/java/net/vulkanmod/vulkan/Renderer.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import net.vulkanmod.vulkan.memory.MemoryManager;
import net.vulkanmod.vulkan.pass.DefaultMainPass;
import net.vulkanmod.vulkan.pass.MainPass;
import net.vulkanmod.vulkan.queue.Queue;
import net.vulkanmod.vulkan.shader.GraphicsPipeline;
import net.vulkanmod.vulkan.shader.Pipeline;
import net.vulkanmod.vulkan.shader.PipelineState;
Expand Down Expand Up @@ -54,6 +55,7 @@ public class Renderer {

private static boolean swapChainUpdate = false;
public static boolean skipRendering = false;
private static final boolean sync2 = DeviceManager.checkExt(KHRSynchronization2.VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);

public static void initRenderer() {
INSTANCE = new Renderer();
Expand Down Expand Up @@ -88,7 +90,7 @@ public static int getCurrentImage() {
private List<VkCommandBuffer> commandBuffers;
private ArrayList<Long> imageAvailableSemaphores;
private ArrayList<Long> renderFinishedSemaphores;
private ArrayList<Long> inFlightFences;
private long inFlightSubmits;

private Framebuffer boundFramebuffer;
private RenderPass boundRenderPass;
Expand Down Expand Up @@ -164,33 +166,25 @@ private void allocateCommandBuffers() {
private void createSyncObjects() {
imageAvailableSemaphores = new ArrayList<>(framesNum);
renderFinishedSemaphores = new ArrayList<>(framesNum);
inFlightFences = new ArrayList<>(framesNum);

try (MemoryStack stack = stackPush()) {

VkSemaphoreCreateInfo semaphoreInfo = VkSemaphoreCreateInfo.calloc(stack);
semaphoreInfo.sType(VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO);

VkFenceCreateInfo fenceInfo = VkFenceCreateInfo.calloc(stack);
fenceInfo.sType(VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
fenceInfo.flags(VK_FENCE_CREATE_SIGNALED_BIT);

LongBuffer pImageAvailableSemaphore = stack.mallocLong(1);
LongBuffer pRenderFinishedSemaphore = stack.mallocLong(1);
LongBuffer pFence = stack.mallocLong(1);

for (int i = 0; i < framesNum; i++) {

if (vkCreateSemaphore(device, semaphoreInfo, null, pImageAvailableSemaphore) != VK_SUCCESS
|| vkCreateSemaphore(device, semaphoreInfo, null, pRenderFinishedSemaphore) != VK_SUCCESS
|| vkCreateFence(device, fenceInfo, null, pFence) != VK_SUCCESS) {
|| vkCreateSemaphore(device, semaphoreInfo, null, pRenderFinishedSemaphore) != VK_SUCCESS) {

throw new RuntimeException("Failed to create synchronization objects for the frame: " + i);
}

imageAvailableSemaphores.add(pImageAvailableSemaphore.get(0));
renderFinishedSemaphores.add(pRenderFinishedSemaphore.get(0));
inFlightFences.add(pFence.get(0));

}

Expand All @@ -206,7 +200,7 @@ public void preInitFrame() {
// runTick might be called recursively,
// this check forces sync to avoid upload corruption
if (lastReset == currentFrame) {
waitFences();
submitPending();
}
lastReset = currentFrame;

Expand Down Expand Up @@ -238,20 +232,27 @@ public void beginFrame() {
if (skipRendering || recordingCmds)
return;

vkWaitForFences(device, inFlightFences.get(currentFrame), true, VUtil.UINT64_MAX);
try (MemoryStack stack = stackPush()) {

p.pop();
p.push("Begin_rendering");
//Wait on previous frame's submit: (framesNum - 1) is equal to Max Frames in Flight
// Mimics Array and Current Frame index by "Stepping Back" in the timeline

MemoryManager.getInstance().initFrame(currentFrame);
drawer.setCurrentFrame(currentFrame);
//TODO: Possible Nvidia VSync bug: stutter when submitting > 1 Submits in Flight (when in FIFO mode)
final int maxFiF = swapChain.isVsync() ? 1 : framesNum - 1;
DeviceManager.getGraphicsQueue().waitSubmits(stack, Math.max(0L, inFlightSubmits - maxFiF));
//Testing using Graphics Timeline as a substitute for inFlightFences
//Aggregate frame fences and Graphics Queue fences together as one

resetDescriptors();

currentCmdBuffer = commandBuffers.get(currentFrame);
vkResetCommandBuffer(currentCmdBuffer, 0);
p.pop();
p.push("Begin_rendering");

try (MemoryStack stack = stackPush()) {
MemoryManager.getInstance().initFrame(currentFrame);
drawer.setCurrentFrame(currentFrame);

resetDescriptors();

currentCmdBuffer = commandBuffers.get(currentFrame);

IntBuffer pImageIndex = stack.mallocInt(1);

Expand Down Expand Up @@ -303,7 +304,7 @@ public void endFrame() {

mainPass.end(currentCmdBuffer);

waitFences();
submitPending();

submitFrame();
recordingCmds = false;
Expand All @@ -317,23 +318,12 @@ private void submitFrame() {
return;

try (MemoryStack stack = stackPush()) {
int vkResult;

VkSubmitInfo submitInfo = VkSubmitInfo.calloc(stack);
submitInfo.sType(VK_STRUCTURE_TYPE_SUBMIT_INFO);
//Wait Async Transfers on host to avoid invalid frees (Destroy Buffer during use)
DeviceManager.getTransferQueue().waitSubmits(stack);

submitInfo.waitSemaphoreCount(1);
submitInfo.pWaitSemaphores(stack.longs(imageAvailableSemaphores.get(currentFrame)));
submitInfo.pWaitDstStageMask(stack.ints(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT));
submitInfo.pSignalSemaphores(stack.longs(renderFinishedSemaphores.get(currentFrame)));
submitInfo.pCommandBuffers(stack.pointers(currentCmdBuffer));

vkResetFences(device, inFlightFences.get(currentFrame));

if ((vkResult = vkQueueSubmit(DeviceManager.getGraphicsQueue().queue(), submitInfo, inFlightFences.get(currentFrame))) != VK_SUCCESS) {
vkResetFences(device, inFlightFences.get(currentFrame));
throw new RuntimeException("Failed to submit draw command buffer: %s".formatted(VkResult.decode(vkResult)));
}
final long submitId = sync2 ? getSubmitId2(stack) : getSubmitId(stack);

VkPresentInfoKHR presentInfo = VkPresentInfoKHR.calloc(stack);
presentInfo.sType(VK_STRUCTURE_TYPE_PRESENT_INFO_KHR);
Expand All @@ -345,7 +335,7 @@ private void submitFrame() {

presentInfo.pImageIndices(stack.ints(imageIndex));

vkResult = vkQueuePresentKHR(DeviceManager.getPresentQueue().queue(), presentInfo);
final int vkResult = vkQueuePresentKHR(DeviceManager.getPresentQueue().queue(), presentInfo);

if (vkResult == VK_ERROR_OUT_OF_DATE_KHR || vkResult == VK_SUBOPTIMAL_KHR || swapChainUpdate) {
swapChainUpdate = true;
Expand All @@ -355,7 +345,88 @@ private void submitFrame() {
}

currentFrame = (currentFrame + 1) % framesNum;

inFlightSubmits = submitId;

}
}

// Workaround used to fix macOS compatibility:
// LWJGL 3.3.3 uses an outdated MVK version, which doesn't support Sync2: (LWJGL 3.3.4 required for Sync2 on MoltenVK)
// Cannot be used on Nvidia due to using Host sync, which destabilizes VSync
//
// Remove when Mojang Updates to LWJGL 3.3.4+: (Allowing Sync2 Support on macOS)
private long getSubmitId(MemoryStack stack) {
Queue graphicsQueue = DeviceManager.getGraphicsQueue();

final int vkResult;


//Can't sync the GPU fully w/o Sync2, must use Host sync instead (Breaks Nvidia VSync)
graphicsQueue.waitSubmits(stack);

VkTimelineSemaphoreSubmitInfo mainSemaphoreSubmitInfo = VkTimelineSemaphoreSubmitInfo.calloc(stack)
.sType$Default()
.pSignalSemaphoreValues(stack.longs(0, graphicsQueue.submitCountAdd()));

VkSubmitInfo submitInfo = VkSubmitInfo.calloc(stack);
submitInfo.sType(VK_STRUCTURE_TYPE_SUBMIT_INFO);
submitInfo.pNext(mainSemaphoreSubmitInfo);
submitInfo.waitSemaphoreCount(1);
submitInfo.pWaitSemaphores(stack.longs(imageAvailableSemaphores.get(currentFrame)));
submitInfo.pWaitDstStageMask(stack.ints(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT));
submitInfo.pSignalSemaphores(stack.longs(renderFinishedSemaphores.get(currentFrame), graphicsQueue.getTmSemaphore()));
submitInfo.pCommandBuffers(stack.pointers(currentCmdBuffer));

if ((vkResult = vkQueueSubmit(graphicsQueue.queue(), submitInfo, 0)) != VK_SUCCESS) {
throw new RuntimeException("Failed to submit draw command buffer: %s".formatted(VkResult.decode(vkResult)));
}
return graphicsQueue.submitCount();
}

//Used to Fix VSync stability on Nvidia: Used when Sync2 is supported (Most Vk1.2 Systems)
private long getSubmitId2(MemoryStack stack) {

Queue graphicsQueue = DeviceManager.getGraphicsQueue();
final int vkResult;

VkCommandBufferSubmitInfo.Buffer commandBufferSubmitInfo = VkCommandBufferSubmitInfo.calloc(1, stack)
.sType$Default()
.commandBuffer(currentCmdBuffer);

//Nvidia; Replace fence waits with a submit barrier: restoring VSync stability on Nvidia
VkSemaphoreSubmitInfo.Buffer waitSemaphoreSubmitInfo = VkSemaphoreSubmitInfo.calloc(2, stack);
waitSemaphoreSubmitInfo.get(0).sType$Default()
.semaphore(imageAvailableSemaphores.get(currentFrame))
.stageMask(VK13.VK_PIPELINE_STAGE_2_CLEAR_BIT) //Attachment Clears
.value(0);

waitSemaphoreSubmitInfo.get(1).sType$Default()
.semaphore(graphicsQueue.getTmSemaphore())
.stageMask(VK13.VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT) //LightMap Sampler Transitions
.value(graphicsQueue.submitCount());

VkSemaphoreSubmitInfo.Buffer mainSemaphoreSubmitInfo = VkSemaphoreSubmitInfo.calloc(2, stack);
mainSemaphoreSubmitInfo.get(0).sType$Default()
.semaphore(renderFinishedSemaphores.get(currentFrame))
.stageMask(VK13.VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT)
.value(0);

mainSemaphoreSubmitInfo.get(1).sType$Default()
.semaphore(graphicsQueue.getTmSemaphore())
.stageMask(VK13.VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT)
.value(graphicsQueue.submitCountAdd());

VkSubmitInfo2.Buffer submitInfo = VkSubmitInfo2.calloc(1, stack)
.sType$Default()
.pWaitSemaphoreInfos(waitSemaphoreSubmitInfo)
.pSignalSemaphoreInfos(mainSemaphoreSubmitInfo)
.pCommandBufferInfos(commandBufferSubmitInfo);

if ((vkResult = KHRSynchronization2.vkQueueSubmit2KHR(graphicsQueue.queue(), submitInfo, 0)) != VK_SUCCESS) {
throw new RuntimeException("Failed to submit draw command buffer: %s".formatted(VkResult.decode(vkResult)));
}
return graphicsQueue.submitCount();
}

/**
Expand All @@ -366,26 +437,15 @@ public void flushCmds() {
return;

try (MemoryStack stack = stackPush()) {
int vkResult;

this.endRenderPass(currentCmdBuffer);
vkEndCommandBuffer(currentCmdBuffer);

VkSubmitInfo submitInfo = VkSubmitInfo.calloc(stack);
submitInfo.sType(VK_STRUCTURE_TYPE_SUBMIT_INFO);

submitInfo.pCommandBuffers(stack.pointers(currentCmdBuffer));
submitPending();

vkResetFences(device, inFlightFences.get(currentFrame));
final long submitId = sync2 ? getSubmitId2(stack) : getSubmitId(stack);

waitFences();

if ((vkResult = vkQueueSubmit(DeviceManager.getGraphicsQueue().queue(), submitInfo, inFlightFences.get(currentFrame))) != VK_SUCCESS) {
vkResetFences(device, inFlightFences.get(currentFrame));
throw new RuntimeException("Failed to submit draw command buffer: %s".formatted(VkResult.decode(vkResult)));
}

vkWaitForFences(device, inFlightFences.get(currentFrame), true, VUtil.UINT64_MAX);
DeviceManager.getGraphicsQueue().waitSubmits(stack, submitId);

this.beginRenderPass(stack);
}
Expand Down Expand Up @@ -433,11 +493,10 @@ public void addUsedPipeline(Pipeline pipeline) {
public void removeUsedPipeline(Pipeline pipeline) {
usedPipelines.remove(pipeline);
}

private void waitFences() {
// Make sure there are no uploads/transitions scheduled
//Synchronization fences are merged into vkQueueSubmit2 submit Barrier, reducing sync overhead and improving frametime
private void submitPending() {
ImageUploadHelper.INSTANCE.submitCommands();
Synchronization.INSTANCE.waitFences();
Synchronization.INSTANCE.recycleCmdBuffers();
Vulkan.getStagingBuffer().reset();
}

Expand All @@ -451,25 +510,9 @@ private void resetDescriptors() {
boundPipelineHandle = 0;
}

void waitForSwapChain() {
vkResetFences(device, inFlightFences.get(currentFrame));

// constexpr VkPipelineStageFlags t=VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
try (MemoryStack stack = MemoryStack.stackPush()) {
//Empty Submit
VkSubmitInfo info = VkSubmitInfo.calloc(stack)
.sType$Default()
.pWaitSemaphores(stack.longs(imageAvailableSemaphores.get(currentFrame)))
.pWaitDstStageMask(stack.ints(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT));

vkQueueSubmit(DeviceManager.getGraphicsQueue().queue(), info, inFlightFences.get(currentFrame));
vkWaitForFences(device, inFlightFences.get(currentFrame), true, -1);
}
}

@SuppressWarnings("UnreachableCode")
private void recreateSwapChain() {
waitFences();
submitPending();
Vulkan.waitIdle();

commandBuffers.forEach(commandBuffer -> vkResetCommandBuffer(commandBuffer, 0));
Expand Down Expand Up @@ -518,7 +561,6 @@ public void cleanUpResources() {

private void destroySyncObjects() {
for (int i = 0; i < framesNum; ++i) {
vkDestroyFence(device, inFlightFences.get(i), null);
vkDestroySemaphore(device, imageAvailableSemaphores.get(i), null);
vkDestroySemaphore(device, renderFinishedSemaphores.get(i), null);
}
Expand Down
Loading