1235 lines
48 KiB
C++
1235 lines
48 KiB
C++
//
|
|
// Copyright 2016 The ANGLE Project Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
//
|
|
// BufferVk.cpp:
|
|
// Implements the class methods for BufferVk.
|
|
//
|
|
|
|
#include "libANGLE/renderer/vulkan/BufferVk.h"
|
|
|
|
#include "common/FixedVector.h"
|
|
#include "common/debug.h"
|
|
#include "common/mathutil.h"
|
|
#include "common/utilities.h"
|
|
#include "libANGLE/Context.h"
|
|
#include "libANGLE/renderer/vulkan/ContextVk.h"
|
|
#include "libANGLE/renderer/vulkan/RendererVk.h"
|
|
|
|
namespace rx
|
|
{
|
|
VkBufferUsageFlags GetDefaultBufferUsageFlags(RendererVk *renderer)
|
|
{
|
|
// We could potentially use multiple backing buffers for different usages.
|
|
// For now keep a single buffer with all relevant usage flags.
|
|
VkBufferUsageFlags defaultBufferUsageFlags =
|
|
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
|
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
|
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
|
|
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
|
|
VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
|
|
if (renderer->getFeatures().supportsTransformFeedbackExtension.enabled)
|
|
{
|
|
defaultBufferUsageFlags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
|
|
VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
|
|
}
|
|
return defaultBufferUsageFlags;
|
|
}
|
|
|
|
namespace
|
|
{
|
|
constexpr VkMemoryPropertyFlags kDeviceLocalFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
|
constexpr VkMemoryPropertyFlags kDeviceLocalHostCoherentFlags =
|
|
(VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
|
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
|
constexpr VkMemoryPropertyFlags kHostCachedFlags =
|
|
(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
|
VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
|
|
constexpr VkMemoryPropertyFlags kHostUncachedFlags =
|
|
(VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
|
|
|
// Vertex attribute buffers are used as storage buffers for conversion in compute, where access to
|
|
// the buffer is made in 4-byte chunks. Assume the size of the buffer is 4k+n where n is in [0, 3).
|
|
// On some hardware, reading 4 bytes from address 4k returns 0, making it impossible to read the
|
|
// last n bytes. By rounding up the buffer sizes to a multiple of 4, the problem is alleviated.
|
|
constexpr size_t kBufferSizeGranularity = 4;
|
|
static_assert(gl::isPow2(kBufferSizeGranularity), "use as alignment, must be power of two");
|
|
|
|
// Start with a fairly small buffer size. We can increase this dynamically as we convert more data.
|
|
constexpr size_t kConvertedArrayBufferInitialSize = 1024 * 8;
|
|
|
|
// Buffers that have a static usage pattern will be allocated in
|
|
// device local memory to speed up access to and from the GPU.
|
|
// Dynamic usage patterns or that are frequently mapped
|
|
// will now request host cached memory to speed up access from the CPU.
|
|
VkMemoryPropertyFlags GetPreferredMemoryType(RendererVk *renderer,
|
|
gl::BufferBinding target,
|
|
gl::BufferUsage usage)
|
|
{
|
|
if (target == gl::BufferBinding::PixelUnpack)
|
|
{
|
|
return kHostCachedFlags;
|
|
}
|
|
|
|
switch (usage)
|
|
{
|
|
case gl::BufferUsage::StaticCopy:
|
|
case gl::BufferUsage::StaticDraw:
|
|
case gl::BufferUsage::StaticRead:
|
|
// For static usage, request a device local memory
|
|
return renderer->getFeatures().preferDeviceLocalMemoryHostVisible.enabled
|
|
? kDeviceLocalHostCoherentFlags
|
|
: kDeviceLocalFlags;
|
|
case gl::BufferUsage::DynamicDraw:
|
|
case gl::BufferUsage::StreamDraw:
|
|
// For non-static usage where the CPU performs a write-only access, request
|
|
// a host uncached memory
|
|
return kHostUncachedFlags;
|
|
case gl::BufferUsage::DynamicCopy:
|
|
case gl::BufferUsage::DynamicRead:
|
|
case gl::BufferUsage::StreamCopy:
|
|
case gl::BufferUsage::StreamRead:
|
|
// For all other types of usage, request a host cached memory
|
|
return kHostCachedFlags;
|
|
default:
|
|
UNREACHABLE();
|
|
return kHostCachedFlags;
|
|
}
|
|
}
|
|
|
|
VkMemoryPropertyFlags GetStorageMemoryType(RendererVk *renderer,
|
|
GLbitfield storageFlags,
|
|
bool externalBuffer)
|
|
{
|
|
const bool hasMapAccess =
|
|
(storageFlags & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT_EXT)) != 0;
|
|
|
|
if (renderer->getFeatures().preferDeviceLocalMemoryHostVisible.enabled)
|
|
{
|
|
const bool canUpdate = (storageFlags & GL_DYNAMIC_STORAGE_BIT_EXT) != 0;
|
|
if (canUpdate || hasMapAccess || externalBuffer)
|
|
{
|
|
// We currently allocate coherent memory for persistently mapped buffers.
|
|
// GL_EXT_buffer_storage allows non-coherent memory, but currently the implementation of
|
|
// |glMemoryBarrier(CLIENT_MAPPED_BUFFER_BARRIER_BIT_EXT)| relies on the mapping being
|
|
// coherent.
|
|
//
|
|
// If persistently mapped buffers ever use non-coherent memory, then said
|
|
// |glMemoryBarrier| call must result in |vkInvalidateMappedMemoryRanges| for all
|
|
// persistently mapped buffers.
|
|
return kDeviceLocalHostCoherentFlags;
|
|
}
|
|
return kDeviceLocalFlags;
|
|
}
|
|
|
|
return hasMapAccess ? kHostCachedFlags : kDeviceLocalFlags;
|
|
}
|
|
|
|
bool ShouldAllocateNewMemoryForUpdate(ContextVk *contextVk, size_t subDataSize, size_t bufferSize)
|
|
{
|
|
// A sub data update with size > 50% of buffer size meets the threshold
|
|
// to acquire a new BufferHelper from the pool.
|
|
return contextVk->getRenderer()->getFeatures().preferCPUForBufferSubData.enabled ||
|
|
subDataSize > (bufferSize / 2);
|
|
}
|
|
|
|
bool ShouldUseCPUToCopyData(ContextVk *contextVk,
|
|
const vk::BufferHelper &buffer,
|
|
size_t copySize,
|
|
size_t bufferSize)
|
|
{
|
|
RendererVk *renderer = contextVk->getRenderer();
|
|
|
|
// If the buffer is not host-visible, or if it's busy on the GPU, can't read from it from the
|
|
// CPU
|
|
if (!buffer.isHostVisible() || !renderer->hasResourceUseFinished(buffer.getWriteResourceUse()))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// For some GPUs (e.g. ARM) we always prefer using CPU to do copy instead of using the GPU to
|
|
// avoid pipeline bubbles. If the GPU is currently busy and data copy size is less than certain
|
|
// threshold, we choose to use CPU to do the copy over GPU to achieve better parallelism.
|
|
return renderer->getFeatures().preferCPUForBufferSubData.enabled ||
|
|
(renderer->isCommandQueueBusy() &&
|
|
copySize < renderer->getMaxCopyBytesUsingCPUWhenPreservingBufferData());
|
|
}
|
|
|
|
bool RenderPassUsesBufferForReadOnly(ContextVk *contextVk, const vk::BufferHelper &buffer)
|
|
{
|
|
if (!contextVk->hasActiveRenderPass())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
vk::RenderPassCommandBufferHelper &renderPassCommands =
|
|
contextVk->getStartedRenderPassCommands();
|
|
return renderPassCommands.usesBuffer(buffer) && !renderPassCommands.usesBufferForWrite(buffer);
|
|
}
|
|
|
|
// If a render pass is open which uses the buffer in read-only mode, render pass break can be
|
|
// avoided by using acquireAndUpdate. This can be costly however if the update is very small, and
|
|
// is limited to platforms where render pass break is itself costly (i.e. tiled-based renderers).
|
|
bool ShouldAvoidRenderPassBreakOnUpdate(ContextVk *contextVk,
|
|
const vk::BufferHelper &buffer,
|
|
size_t bufferSize)
|
|
{
|
|
// Only avoid breaking the render pass if the buffer is not so big such that duplicating it
|
|
// would outweight the cost of breaking the render pass. A value of 1KB is temporary chosen as
|
|
// a heuristic, and can be adjusted when such a situation is encountered.
|
|
constexpr size_t kPreferDuplicateOverRenderPassBreakMaxBufferSize = 1024;
|
|
if (!contextVk->getFeatures().preferCPUForBufferSubData.enabled ||
|
|
bufferSize > kPreferDuplicateOverRenderPassBreakMaxBufferSize)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return RenderPassUsesBufferForReadOnly(contextVk, buffer);
|
|
}
|
|
|
|
BufferUsageType GetBufferUsageType(gl::BufferUsage usage)
|
|
{
|
|
return (usage == gl::BufferUsage::DynamicDraw || usage == gl::BufferUsage::DynamicCopy ||
|
|
usage == gl::BufferUsage::DynamicRead)
|
|
? BufferUsageType::Dynamic
|
|
: BufferUsageType::Static;
|
|
}
|
|
|
|
angle::Result GetMemoryTypeIndex(ContextVk *contextVk,
|
|
VkDeviceSize size,
|
|
VkMemoryPropertyFlags memoryPropertyFlags,
|
|
uint32_t *memoryTypeIndexOut)
|
|
{
|
|
RendererVk *renderer = contextVk->getRenderer();
|
|
const vk::Allocator &allocator = renderer->getAllocator();
|
|
|
|
bool persistentlyMapped = renderer->getFeatures().persistentlyMappedBuffers.enabled;
|
|
VkBufferUsageFlags defaultBufferUsageFlags = GetDefaultBufferUsageFlags(renderer);
|
|
|
|
VkBufferCreateInfo createInfo = {};
|
|
createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
|
createInfo.flags = 0;
|
|
createInfo.size = size;
|
|
createInfo.usage = defaultBufferUsageFlags;
|
|
createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
|
createInfo.queueFamilyIndexCount = 0;
|
|
createInfo.pQueueFamilyIndices = nullptr;
|
|
|
|
// Host visible is required, all other bits are preferred, (i.e., optional)
|
|
VkMemoryPropertyFlags requiredFlags =
|
|
(memoryPropertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
|
VkMemoryPropertyFlags preferredFlags =
|
|
(memoryPropertyFlags & (~VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
|
|
|
|
// Check that the allocation is not too large.
|
|
uint32_t memoryTypeIndex = 0;
|
|
ANGLE_VK_TRY(contextVk, allocator.findMemoryTypeIndexForBufferInfo(
|
|
createInfo, requiredFlags, preferredFlags, persistentlyMapped,
|
|
&memoryTypeIndex));
|
|
*memoryTypeIndexOut = memoryTypeIndex;
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
bool IsSelfCopy(const BufferDataSource &dataSource, const vk::BufferHelper &destination)
|
|
{
|
|
return dataSource.data == nullptr &&
|
|
dataSource.buffer->getBufferSerial() == destination.getBufferSerial();
|
|
}
|
|
} // namespace
|
|
|
|
// ConversionBuffer implementation.
|
|
ConversionBuffer::ConversionBuffer(RendererVk *renderer,
|
|
VkBufferUsageFlags usageFlags,
|
|
size_t initialSize,
|
|
size_t alignment,
|
|
bool hostVisible)
|
|
: dirty(true)
|
|
{
|
|
data = std::make_unique<vk::BufferHelper>();
|
|
}
|
|
|
|
ConversionBuffer::~ConversionBuffer()
|
|
{
|
|
ASSERT(!data || !data->valid());
|
|
}
|
|
|
|
ConversionBuffer::ConversionBuffer(ConversionBuffer &&other) = default;
|
|
|
|
// BufferVk::VertexConversionBuffer implementation.
|
|
BufferVk::VertexConversionBuffer::VertexConversionBuffer(RendererVk *renderer,
|
|
angle::FormatID formatIDIn,
|
|
GLuint strideIn,
|
|
size_t offsetIn,
|
|
bool hostVisible)
|
|
: ConversionBuffer(renderer,
|
|
vk::kVertexBufferUsageFlags,
|
|
kConvertedArrayBufferInitialSize,
|
|
vk::kVertexBufferAlignment,
|
|
hostVisible),
|
|
formatID(formatIDIn),
|
|
stride(strideIn),
|
|
offset(offsetIn)
|
|
{}
|
|
|
|
BufferVk::VertexConversionBuffer::VertexConversionBuffer(VertexConversionBuffer &&other) = default;
|
|
|
|
BufferVk::VertexConversionBuffer::~VertexConversionBuffer() = default;
|
|
|
|
// BufferVk implementation.
|
|
BufferVk::BufferVk(const gl::BufferState &state)
|
|
: BufferImpl(state),
|
|
mClientBuffer(nullptr),
|
|
mMemoryTypeIndex(0),
|
|
mMemoryPropertyFlags(0),
|
|
mIsStagingBufferMapped(false),
|
|
mHasValidData(false),
|
|
mIsMappedForWrite(false),
|
|
mUsageType(BufferUsageType::Static),
|
|
mMappedOffset(0),
|
|
mMappedLength(0)
|
|
{}
|
|
|
|
BufferVk::~BufferVk() {}
|
|
|
|
void BufferVk::destroy(const gl::Context *context)
|
|
{
|
|
ContextVk *contextVk = vk::GetImpl(context);
|
|
|
|
release(contextVk);
|
|
}
|
|
|
|
void BufferVk::release(ContextVk *contextVk)
|
|
{
|
|
RendererVk *renderer = contextVk->getRenderer();
|
|
if (mBuffer.valid())
|
|
{
|
|
mBuffer.releaseBufferAndDescriptorSetCache(contextVk);
|
|
}
|
|
if (mStagingBuffer.valid())
|
|
{
|
|
mStagingBuffer.release(renderer);
|
|
}
|
|
|
|
for (ConversionBuffer &buffer : mVertexConversionBuffers)
|
|
{
|
|
buffer.data->release(renderer);
|
|
}
|
|
mVertexConversionBuffers.clear();
|
|
}
|
|
|
|
angle::Result BufferVk::setExternalBufferData(const gl::Context *context,
|
|
gl::BufferBinding target,
|
|
GLeglClientBufferEXT clientBuffer,
|
|
size_t size,
|
|
VkMemoryPropertyFlags memoryPropertyFlags)
|
|
{
|
|
ContextVk *contextVk = vk::GetImpl(context);
|
|
|
|
// Release and re-create the memory and buffer.
|
|
release(contextVk);
|
|
|
|
VkBufferCreateInfo createInfo = {};
|
|
createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
|
createInfo.flags = 0;
|
|
createInfo.size = size;
|
|
createInfo.usage = GetDefaultBufferUsageFlags(contextVk->getRenderer());
|
|
createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
|
createInfo.queueFamilyIndexCount = 0;
|
|
createInfo.pQueueFamilyIndices = nullptr;
|
|
|
|
return mBuffer.initExternal(contextVk, memoryPropertyFlags, createInfo, clientBuffer);
|
|
}
|
|
|
|
angle::Result BufferVk::setDataWithUsageFlags(const gl::Context *context,
|
|
gl::BufferBinding target,
|
|
GLeglClientBufferEXT clientBuffer,
|
|
const void *data,
|
|
size_t size,
|
|
gl::BufferUsage usage,
|
|
GLbitfield flags)
|
|
{
|
|
ContextVk *contextVk = vk::GetImpl(context);
|
|
VkMemoryPropertyFlags memoryPropertyFlags = 0;
|
|
bool persistentMapRequired = false;
|
|
const bool isExternalBuffer = clientBuffer != nullptr;
|
|
|
|
switch (usage)
|
|
{
|
|
case gl::BufferUsage::InvalidEnum:
|
|
{
|
|
// glBufferStorage API call
|
|
memoryPropertyFlags =
|
|
GetStorageMemoryType(contextVk->getRenderer(), flags, isExternalBuffer);
|
|
persistentMapRequired = (flags & GL_MAP_PERSISTENT_BIT_EXT) != 0;
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
// glBufferData API call
|
|
memoryPropertyFlags = GetPreferredMemoryType(contextVk->getRenderer(), target, usage);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (isExternalBuffer)
|
|
{
|
|
ANGLE_TRY(setExternalBufferData(context, target, clientBuffer, size, memoryPropertyFlags));
|
|
if (!mBuffer.isHostVisible())
|
|
{
|
|
// If external buffer's memory does not support host visible memory property, we cannot
|
|
// support a persistent map request.
|
|
ANGLE_VK_CHECK(contextVk, !persistentMapRequired, VK_ERROR_MEMORY_MAP_FAILED);
|
|
}
|
|
|
|
mClientBuffer = clientBuffer;
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, usage);
|
|
}
|
|
|
|
angle::Result BufferVk::setData(const gl::Context *context,
|
|
gl::BufferBinding target,
|
|
const void *data,
|
|
size_t size,
|
|
gl::BufferUsage usage)
|
|
{
|
|
ContextVk *contextVk = vk::GetImpl(context);
|
|
// Assume host visible/coherent memory available.
|
|
VkMemoryPropertyFlags memoryPropertyFlags =
|
|
GetPreferredMemoryType(contextVk->getRenderer(), target, usage);
|
|
return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, usage);
|
|
}
|
|
|
|
angle::Result BufferVk::setDataWithMemoryType(const gl::Context *context,
|
|
gl::BufferBinding target,
|
|
const void *data,
|
|
size_t size,
|
|
VkMemoryPropertyFlags memoryPropertyFlags,
|
|
gl::BufferUsage usage)
|
|
{
|
|
ContextVk *contextVk = vk::GetImpl(context);
|
|
RendererVk *renderer = contextVk->getRenderer();
|
|
|
|
// Since the buffer is being entirely reinitialized, reset the valid-data flag. If the caller
|
|
// passed in data to fill the buffer, the flag will be updated when the data is copied to the
|
|
// buffer.
|
|
mHasValidData = false;
|
|
|
|
if (size == 0)
|
|
{
|
|
// Nothing to do.
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
const BufferUsageType usageType = GetBufferUsageType(usage);
|
|
const BufferUpdateType updateType =
|
|
calculateBufferUpdateTypeOnFullUpdate(renderer, size, memoryPropertyFlags, usageType, data);
|
|
|
|
if (updateType == BufferUpdateType::StorageRedefined)
|
|
{
|
|
mUsageType = usageType;
|
|
mMemoryPropertyFlags = memoryPropertyFlags;
|
|
ANGLE_TRY(GetMemoryTypeIndex(contextVk, size, memoryPropertyFlags, &mMemoryTypeIndex));
|
|
ANGLE_TRY(acquireBufferHelper(contextVk, size, mUsageType));
|
|
}
|
|
|
|
if (data != nullptr)
|
|
{
|
|
BufferDataSource dataSource = {};
|
|
dataSource.data = data;
|
|
|
|
// Handle full-buffer updates similarly to glBufferSubData
|
|
ANGLE_TRY(setDataImpl(contextVk, size, dataSource, size, 0, updateType));
|
|
}
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
angle::Result BufferVk::setSubData(const gl::Context *context,
|
|
gl::BufferBinding target,
|
|
const void *data,
|
|
size_t size,
|
|
size_t offset)
|
|
{
|
|
ASSERT(mBuffer.valid());
|
|
|
|
BufferDataSource dataSource = {};
|
|
dataSource.data = data;
|
|
|
|
ContextVk *contextVk = vk::GetImpl(context);
|
|
return setDataImpl(contextVk, static_cast<size_t>(mState.getSize()), dataSource, size, offset,
|
|
BufferUpdateType::ContentsUpdate);
|
|
}
|
|
|
|
angle::Result BufferVk::copySubData(const gl::Context *context,
|
|
BufferImpl *source,
|
|
GLintptr sourceOffset,
|
|
GLintptr destOffset,
|
|
GLsizeiptr size)
|
|
{
|
|
ASSERT(mBuffer.valid());
|
|
|
|
ContextVk *contextVk = vk::GetImpl(context);
|
|
BufferVk *sourceVk = GetAs<BufferVk>(source);
|
|
|
|
BufferDataSource dataSource = {};
|
|
dataSource.buffer = &sourceVk->getBuffer();
|
|
dataSource.bufferOffset = static_cast<VkDeviceSize>(sourceOffset);
|
|
|
|
ASSERT(dataSource.buffer->valid());
|
|
|
|
return setDataImpl(contextVk, static_cast<size_t>(mState.getSize()), dataSource, size,
|
|
destOffset, BufferUpdateType::ContentsUpdate);
|
|
}
|
|
|
|
angle::Result BufferVk::allocStagingBuffer(ContextVk *contextVk,
|
|
vk::MemoryCoherency coherency,
|
|
VkDeviceSize size,
|
|
uint8_t **mapPtr)
|
|
{
|
|
ASSERT(!mIsStagingBufferMapped);
|
|
|
|
if (mStagingBuffer.valid())
|
|
{
|
|
if (size <= mStagingBuffer.getSize() &&
|
|
(coherency == vk::MemoryCoherency::Coherent) == mStagingBuffer.isCoherent() &&
|
|
contextVk->getRenderer()->hasResourceUseFinished(mStagingBuffer.getResourceUse()))
|
|
{
|
|
// If size is big enough and it is idle, then just reuse the existing staging buffer
|
|
*mapPtr = mStagingBuffer.getMappedMemory();
|
|
mIsStagingBufferMapped = true;
|
|
return angle::Result::Continue;
|
|
}
|
|
mStagingBuffer.release(contextVk->getRenderer());
|
|
}
|
|
|
|
ANGLE_TRY(
|
|
mStagingBuffer.allocateForCopyBuffer(contextVk, static_cast<size_t>(size), coherency));
|
|
*mapPtr = mStagingBuffer.getMappedMemory();
|
|
mIsStagingBufferMapped = true;
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
angle::Result BufferVk::flushStagingBuffer(ContextVk *contextVk,
|
|
VkDeviceSize offset,
|
|
VkDeviceSize size)
|
|
{
|
|
RendererVk *renderer = contextVk->getRenderer();
|
|
|
|
ASSERT(mIsStagingBufferMapped);
|
|
ASSERT(mStagingBuffer.valid());
|
|
|
|
if (!mStagingBuffer.isCoherent())
|
|
{
|
|
ANGLE_TRY(mStagingBuffer.flush(renderer));
|
|
}
|
|
|
|
// Enqueue a copy command on the GPU.
|
|
VkBufferCopy copyRegion = {mStagingBuffer.getOffset(), mBuffer.getOffset() + offset, size};
|
|
ANGLE_TRY(mBuffer.copyFromBuffer(contextVk, &mStagingBuffer, 1, ©Region));
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
angle::Result BufferVk::handleDeviceLocalBufferMap(ContextVk *contextVk,
|
|
VkDeviceSize offset,
|
|
VkDeviceSize size,
|
|
uint8_t **mapPtr)
|
|
{
|
|
ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::Coherent, size, mapPtr));
|
|
|
|
// Copy data from device local buffer to host visible staging buffer.
|
|
VkBufferCopy copyRegion = {mBuffer.getOffset() + offset, mStagingBuffer.getOffset(), size};
|
|
ANGLE_TRY(mStagingBuffer.copyFromBuffer(contextVk, &mBuffer, 1, ©Region));
|
|
ANGLE_TRY(mStagingBuffer.waitForIdle(contextVk, "GPU stall due to mapping device local buffer",
|
|
RenderPassClosureReason::DeviceLocalBufferMap));
|
|
// Because the buffer is coherent, no need to call invalidate here.
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
angle::Result BufferVk::map(const gl::Context *context, GLenum access, void **mapPtr)
|
|
{
|
|
ASSERT(mBuffer.valid());
|
|
ASSERT(access == GL_WRITE_ONLY_OES);
|
|
|
|
return mapImpl(vk::GetImpl(context), GL_MAP_WRITE_BIT, mapPtr);
|
|
}
|
|
|
|
angle::Result BufferVk::mapRange(const gl::Context *context,
|
|
size_t offset,
|
|
size_t length,
|
|
GLbitfield access,
|
|
void **mapPtr)
|
|
{
|
|
ANGLE_TRACE_EVENT0("gpu.angle", "BufferVk::mapRange");
|
|
return mapRangeImpl(vk::GetImpl(context), offset, length, access, mapPtr);
|
|
}
|
|
|
|
angle::Result BufferVk::mapImpl(ContextVk *contextVk, GLbitfield access, void **mapPtr)
|
|
{
|
|
return mapRangeImpl(contextVk, 0, static_cast<VkDeviceSize>(mState.getSize()), access, mapPtr);
|
|
}
|
|
|
|
angle::Result BufferVk::ghostMappedBuffer(ContextVk *contextVk,
|
|
VkDeviceSize offset,
|
|
VkDeviceSize length,
|
|
GLbitfield access,
|
|
void **mapPtr)
|
|
{
|
|
// We shouldn't get here if it is external memory
|
|
ASSERT(!isExternalBuffer());
|
|
|
|
++contextVk->getPerfCounters().buffersGhosted;
|
|
|
|
// If we are creating a new buffer because the GPU is using it as read-only, then we
|
|
// also need to copy the contents of the previous buffer into the new buffer, in
|
|
// case the caller only updates a portion of the new buffer.
|
|
vk::BufferHelper src = std::move(mBuffer);
|
|
ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
|
|
BufferUsageType::Dynamic));
|
|
|
|
// Before returning the new buffer, map the previous buffer and copy its entire
|
|
// contents into the new buffer.
|
|
uint8_t *srcMapPtr = nullptr;
|
|
uint8_t *dstMapPtr = nullptr;
|
|
ANGLE_TRY(src.map(contextVk, &srcMapPtr));
|
|
ANGLE_TRY(mBuffer.map(contextVk, &dstMapPtr));
|
|
|
|
ASSERT(src.isCoherent());
|
|
ASSERT(mBuffer.isCoherent());
|
|
|
|
// No need to copy over [offset, offset + length), just around it
|
|
if ((access & GL_MAP_INVALIDATE_RANGE_BIT) != 0)
|
|
{
|
|
if (offset != 0)
|
|
{
|
|
memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(offset));
|
|
}
|
|
size_t totalSize = static_cast<size_t>(mState.getSize());
|
|
size_t remainingStart = static_cast<size_t>(offset + length);
|
|
size_t remainingSize = totalSize - remainingStart;
|
|
if (remainingSize != 0)
|
|
{
|
|
memcpy(dstMapPtr + remainingStart, srcMapPtr + remainingStart, remainingSize);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(mState.getSize()));
|
|
}
|
|
|
|
src.releaseBufferAndDescriptorSetCache(contextVk);
|
|
|
|
// Return the already mapped pointer with the offset adjustment to avoid the call to unmap().
|
|
*mapPtr = dstMapPtr + offset;
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
angle::Result BufferVk::mapRangeImpl(ContextVk *contextVk,
|
|
VkDeviceSize offset,
|
|
VkDeviceSize length,
|
|
GLbitfield access,
|
|
void **mapPtr)
|
|
{
|
|
RendererVk *renderer = contextVk->getRenderer();
|
|
ASSERT(mBuffer.valid());
|
|
|
|
// Record map call parameters in case this call is from angle internal (the access/offset/length
|
|
// will be inconsistent from mState).
|
|
mIsMappedForWrite = (access & GL_MAP_WRITE_BIT) != 0;
|
|
mMappedOffset = offset;
|
|
mMappedLength = length;
|
|
|
|
uint8_t **mapPtrBytes = reinterpret_cast<uint8_t **>(mapPtr);
|
|
bool hostVisible = mBuffer.isHostVisible();
|
|
|
|
// MAP_UNSYNCHRONIZED_BIT, so immediately map.
|
|
if ((access & GL_MAP_UNSYNCHRONIZED_BIT) != 0)
|
|
{
|
|
if (hostVisible)
|
|
{
|
|
return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
|
|
}
|
|
return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
|
|
}
|
|
|
|
// Read case
|
|
if ((access & GL_MAP_WRITE_BIT) == 0)
|
|
{
|
|
// If app is not going to write, all we need is to ensure GPU write is finished.
|
|
// Concurrent reads from CPU and GPU is allowed.
|
|
if (!renderer->hasResourceUseFinished(mBuffer.getWriteResourceUse()))
|
|
{
|
|
// If there are unflushed write commands for the resource, flush them.
|
|
if (contextVk->hasUnsubmittedUse(mBuffer.getWriteResourceUse()))
|
|
{
|
|
ANGLE_TRY(
|
|
contextVk->flushImpl(nullptr, RenderPassClosureReason::BufferWriteThenMap));
|
|
}
|
|
ANGLE_TRY(renderer->finishResourceUse(contextVk, mBuffer.getWriteResourceUse()));
|
|
}
|
|
if (hostVisible)
|
|
{
|
|
return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
|
|
}
|
|
return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
|
|
}
|
|
|
|
// Write case
|
|
if (!hostVisible)
|
|
{
|
|
return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
|
|
}
|
|
|
|
// Write case, buffer not in use.
|
|
if (isExternalBuffer() || !isCurrentlyInUse(contextVk->getRenderer()))
|
|
{
|
|
return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
|
|
}
|
|
|
|
// Write case, buffer in use.
|
|
//
|
|
// Here, we try to map the buffer, but it's busy. Instead of waiting for the GPU to
|
|
// finish, we just allocate a new buffer if:
|
|
// 1.) Caller has told us it doesn't care about previous contents, or
|
|
// 2.) The GPU won't write to the buffer.
|
|
|
|
bool rangeInvalidate = (access & GL_MAP_INVALIDATE_RANGE_BIT) != 0;
|
|
bool entireBufferInvalidated =
|
|
((access & GL_MAP_INVALIDATE_BUFFER_BIT) != 0) ||
|
|
(rangeInvalidate && offset == 0 && static_cast<VkDeviceSize>(mState.getSize()) == length);
|
|
|
|
if (entireBufferInvalidated)
|
|
{
|
|
ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
|
|
BufferUsageType::Dynamic));
|
|
return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
|
|
}
|
|
|
|
bool smallMapRange = (length < static_cast<VkDeviceSize>(mState.getSize()) / 2);
|
|
|
|
if (smallMapRange && rangeInvalidate)
|
|
{
|
|
ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::NonCoherent,
|
|
static_cast<size_t>(length), mapPtrBytes));
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
if (renderer->hasResourceUseFinished(mBuffer.getWriteResourceUse()))
|
|
{
|
|
// This will keep the new buffer mapped and update mapPtr, so return immediately.
|
|
return ghostMappedBuffer(contextVk, offset, length, access, mapPtr);
|
|
}
|
|
|
|
// Write case (worst case, buffer in use for write)
|
|
ANGLE_TRY(mBuffer.waitForIdle(contextVk, "GPU stall due to mapping buffer in use by the GPU",
|
|
RenderPassClosureReason::BufferInUseWhenSynchronizedMap));
|
|
return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
|
|
}
|
|
|
|
angle::Result BufferVk::unmap(const gl::Context *context, GLboolean *result)
|
|
{
|
|
ANGLE_TRY(unmapImpl(vk::GetImpl(context)));
|
|
|
|
// This should be false if the contents have been corrupted through external means. Vulkan
|
|
// doesn't provide such information.
|
|
*result = true;
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
angle::Result BufferVk::unmapImpl(ContextVk *contextVk)
|
|
{
|
|
ASSERT(mBuffer.valid());
|
|
|
|
if (mIsStagingBufferMapped)
|
|
{
|
|
ASSERT(mStagingBuffer.valid());
|
|
// The buffer is device local or optimization of small range map.
|
|
if (mIsMappedForWrite)
|
|
{
|
|
ANGLE_TRY(flushStagingBuffer(contextVk, mMappedOffset, mMappedLength));
|
|
}
|
|
|
|
mIsStagingBufferMapped = false;
|
|
}
|
|
else
|
|
{
|
|
ASSERT(mBuffer.isHostVisible());
|
|
mBuffer.unmap(contextVk->getRenderer());
|
|
}
|
|
|
|
if (mIsMappedForWrite)
|
|
{
|
|
dataUpdated();
|
|
}
|
|
|
|
// Reset the mapping parameters
|
|
mIsMappedForWrite = false;
|
|
mMappedOffset = 0;
|
|
mMappedLength = 0;
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
angle::Result BufferVk::getSubData(const gl::Context *context,
|
|
GLintptr offset,
|
|
GLsizeiptr size,
|
|
void *outData)
|
|
{
|
|
ASSERT(offset + size <= getSize());
|
|
ASSERT(mBuffer.valid());
|
|
ContextVk *contextVk = vk::GetImpl(context);
|
|
void *mapPtr;
|
|
ANGLE_TRY(mapRangeImpl(contextVk, offset, size, GL_MAP_READ_BIT, &mapPtr));
|
|
memcpy(outData, mapPtr, size);
|
|
return unmapImpl(contextVk);
|
|
}
|
|
|
|
angle::Result BufferVk::getIndexRange(const gl::Context *context,
|
|
gl::DrawElementsType type,
|
|
size_t offset,
|
|
size_t count,
|
|
bool primitiveRestartEnabled,
|
|
gl::IndexRange *outRange)
|
|
{
|
|
ContextVk *contextVk = vk::GetImpl(context);
|
|
RendererVk *renderer = contextVk->getRenderer();
|
|
|
|
// This is a workaround for the mock ICD not implementing buffer memory state.
|
|
// Could be removed if https://github.com/KhronosGroup/Vulkan-Tools/issues/84 is fixed.
|
|
if (renderer->isMockICDEnabled())
|
|
{
|
|
outRange->start = 0;
|
|
outRange->end = 0;
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
ANGLE_TRACE_EVENT0("gpu.angle", "BufferVk::getIndexRange");
|
|
|
|
void *mapPtr;
|
|
ANGLE_TRY(mapRangeImpl(contextVk, offset, getSize(), GL_MAP_READ_BIT, &mapPtr));
|
|
*outRange = gl::ComputeIndexRange(type, mapPtr, count, primitiveRestartEnabled);
|
|
ANGLE_TRY(unmapImpl(contextVk));
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
angle::Result BufferVk::updateBuffer(ContextVk *contextVk,
|
|
size_t bufferSize,
|
|
const BufferDataSource &dataSource,
|
|
size_t updateSize,
|
|
size_t updateOffset)
|
|
{
|
|
// To copy on the CPU, destination must be host-visible. The source should also be either a CPU
|
|
// pointer or other a host-visible buffer that is not being written to by the GPU.
|
|
const bool shouldCopyOnCPU =
|
|
mBuffer.isHostVisible() &&
|
|
(dataSource.data != nullptr ||
|
|
ShouldUseCPUToCopyData(contextVk, *dataSource.buffer, updateSize, bufferSize));
|
|
|
|
if (shouldCopyOnCPU)
|
|
{
|
|
ANGLE_TRY(directUpdate(contextVk, dataSource, updateSize, updateOffset));
|
|
}
|
|
else
|
|
{
|
|
ANGLE_TRY(stagedUpdate(contextVk, dataSource, updateSize, updateOffset));
|
|
}
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
angle::Result BufferVk::directUpdate(ContextVk *contextVk,
|
|
const BufferDataSource &dataSource,
|
|
size_t size,
|
|
size_t offset)
|
|
{
|
|
RendererVk *renderer = contextVk->getRenderer();
|
|
uint8_t *srcPointerMapped = nullptr;
|
|
const uint8_t *srcPointer = nullptr;
|
|
uint8_t *dstPointer = nullptr;
|
|
|
|
// Map the destination buffer.
|
|
ASSERT(mBuffer.isHostVisible());
|
|
ANGLE_TRY(mBuffer.mapWithOffset(contextVk, &dstPointer, offset));
|
|
ASSERT(dstPointer);
|
|
|
|
// If source data is coming from a buffer, map it. If this is a self-copy, avoid double-mapping
|
|
// the buffer.
|
|
if (dataSource.data != nullptr)
|
|
{
|
|
srcPointer = static_cast<const uint8_t *>(dataSource.data);
|
|
}
|
|
else
|
|
{
|
|
ANGLE_TRY(dataSource.buffer->mapWithOffset(contextVk, &srcPointerMapped,
|
|
static_cast<size_t>(dataSource.bufferOffset)));
|
|
srcPointer = srcPointerMapped;
|
|
}
|
|
|
|
memcpy(dstPointer, srcPointer, size);
|
|
|
|
// Unmap the destination and source buffers if applicable.
|
|
//
|
|
// If the buffer has dynamic usage then the intent is frequent client side updates to the
|
|
// buffer. Don't CPU unmap the buffer, we will take care of unmapping when releasing the buffer
|
|
// to either the renderer or mBufferFreeList.
|
|
if (GetBufferUsageType(mState.getUsage()) == BufferUsageType::Static)
|
|
{
|
|
mBuffer.unmap(renderer);
|
|
}
|
|
ASSERT(mBuffer.isCoherent());
|
|
|
|
if (srcPointerMapped != nullptr)
|
|
{
|
|
dataSource.buffer->unmap(renderer);
|
|
}
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
angle::Result BufferVk::stagedUpdate(ContextVk *contextVk,
|
|
const BufferDataSource &dataSource,
|
|
size_t size,
|
|
size_t offset)
|
|
{
|
|
// If data is coming from a CPU pointer, stage it in a temporary staging buffer.
|
|
// Otherwise, do a GPU copy directly from the given buffer.
|
|
if (dataSource.data != nullptr)
|
|
{
|
|
uint8_t *mapPointer = nullptr;
|
|
ANGLE_TRY(
|
|
allocStagingBuffer(contextVk, vk::MemoryCoherency::NonCoherent, size, &mapPointer));
|
|
memcpy(mapPointer, dataSource.data, size);
|
|
ANGLE_TRY(flushStagingBuffer(contextVk, offset, size));
|
|
mIsStagingBufferMapped = false;
|
|
}
|
|
else
|
|
{
|
|
// Check for self-dependency.
|
|
vk::CommandBufferAccess access;
|
|
if (dataSource.buffer->getBufferSerial() == mBuffer.getBufferSerial())
|
|
{
|
|
access.onBufferSelfCopy(&mBuffer);
|
|
}
|
|
else
|
|
{
|
|
access.onBufferTransferRead(dataSource.buffer);
|
|
access.onBufferTransferWrite(&mBuffer);
|
|
}
|
|
|
|
vk::OutsideRenderPassCommandBuffer *commandBuffer;
|
|
ANGLE_TRY(contextVk->getOutsideRenderPassCommandBuffer(access, &commandBuffer));
|
|
|
|
// Enqueue a copy command on the GPU.
|
|
const VkBufferCopy copyRegion = {dataSource.bufferOffset + dataSource.buffer->getOffset(),
|
|
static_cast<VkDeviceSize>(offset) + mBuffer.getOffset(),
|
|
static_cast<VkDeviceSize>(size)};
|
|
|
|
commandBuffer->copyBuffer(dataSource.buffer->getBuffer(), mBuffer.getBuffer(), 1,
|
|
©Region);
|
|
}
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
angle::Result BufferVk::acquireAndUpdate(ContextVk *contextVk,
|
|
size_t bufferSize,
|
|
const BufferDataSource &dataSource,
|
|
size_t updateSize,
|
|
size_t updateOffset,
|
|
BufferUpdateType updateType)
|
|
{
|
|
// We shouldn't get here if this is external memory
|
|
ASSERT(!isExternalBuffer());
|
|
// If StorageRedefined, we cannot use mState.getSize() to allocate a new buffer.
|
|
ASSERT(updateType != BufferUpdateType::StorageRedefined);
|
|
ASSERT(mBuffer.valid());
|
|
ASSERT(mBuffer.getSize() >= bufferSize);
|
|
|
|
// Here we acquire a new BufferHelper and directUpdate() the new buffer.
|
|
// If the subData size was less than the buffer's size we additionally enqueue
|
|
// a GPU copy of the remaining regions from the old mBuffer to the new one.
|
|
vk::BufferHelper prevBuffer;
|
|
size_t offsetAfterSubdata = (updateOffset + updateSize);
|
|
bool updateRegionBeforeSubData = mHasValidData && (updateOffset > 0);
|
|
bool updateRegionAfterSubData = mHasValidData && (offsetAfterSubdata < bufferSize);
|
|
|
|
uint8_t *prevMapPtrBeforeSubData = nullptr;
|
|
uint8_t *prevMapPtrAfterSubData = nullptr;
|
|
if (updateRegionBeforeSubData || updateRegionAfterSubData)
|
|
{
|
|
prevBuffer = std::move(mBuffer);
|
|
|
|
// The total bytes that we need to copy from old buffer to new buffer
|
|
size_t copySize = bufferSize - updateSize;
|
|
|
|
// If the buffer is host visible and the GPU is not writing to it, we use the CPU to do the
|
|
// copy. We need to save the source buffer pointer before we acquire a new buffer.
|
|
if (ShouldUseCPUToCopyData(contextVk, prevBuffer, copySize, bufferSize))
|
|
{
|
|
uint8_t *mapPointer = nullptr;
|
|
// prevBuffer buffer will be recycled (or released and unmapped) by acquireBufferHelper
|
|
ANGLE_TRY(prevBuffer.map(contextVk, &mapPointer));
|
|
ASSERT(mapPointer);
|
|
prevMapPtrBeforeSubData = mapPointer;
|
|
prevMapPtrAfterSubData = mapPointer + offsetAfterSubdata;
|
|
}
|
|
}
|
|
|
|
ANGLE_TRY(acquireBufferHelper(contextVk, bufferSize, BufferUsageType::Dynamic));
|
|
ANGLE_TRY(updateBuffer(contextVk, bufferSize, dataSource, updateSize, updateOffset));
|
|
|
|
constexpr int kMaxCopyRegions = 2;
|
|
angle::FixedVector<VkBufferCopy, kMaxCopyRegions> copyRegions;
|
|
|
|
if (updateRegionBeforeSubData)
|
|
{
|
|
if (prevMapPtrBeforeSubData)
|
|
{
|
|
BufferDataSource beforeSrc = {};
|
|
beforeSrc.data = prevMapPtrBeforeSubData;
|
|
|
|
ANGLE_TRY(directUpdate(contextVk, beforeSrc, updateOffset, 0));
|
|
}
|
|
else
|
|
{
|
|
copyRegions.push_back({prevBuffer.getOffset(), mBuffer.getOffset(), updateOffset});
|
|
}
|
|
}
|
|
|
|
if (updateRegionAfterSubData)
|
|
{
|
|
size_t copySize = bufferSize - offsetAfterSubdata;
|
|
if (prevMapPtrAfterSubData)
|
|
{
|
|
BufferDataSource afterSrc = {};
|
|
afterSrc.data = prevMapPtrAfterSubData;
|
|
|
|
ANGLE_TRY(directUpdate(contextVk, afterSrc, copySize, offsetAfterSubdata));
|
|
}
|
|
else
|
|
{
|
|
copyRegions.push_back({prevBuffer.getOffset() + offsetAfterSubdata,
|
|
mBuffer.getOffset() + offsetAfterSubdata, copySize});
|
|
}
|
|
}
|
|
|
|
if (!copyRegions.empty())
|
|
{
|
|
ANGLE_TRY(mBuffer.copyFromBuffer(
|
|
contextVk, &prevBuffer, static_cast<uint32_t>(copyRegions.size()), copyRegions.data()));
|
|
}
|
|
|
|
if (prevBuffer.valid())
|
|
{
|
|
prevBuffer.releaseBufferAndDescriptorSetCache(contextVk);
|
|
}
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
angle::Result BufferVk::setDataImpl(ContextVk *contextVk,
|
|
size_t bufferSize,
|
|
const BufferDataSource &dataSource,
|
|
size_t updateSize,
|
|
size_t updateOffset,
|
|
BufferUpdateType updateType)
|
|
{
|
|
// if the buffer is currently in use
|
|
// if it isn't an external buffer and not a self-copy and sub data size meets threshold
|
|
// acquire a new BufferHelper from the pool
|
|
// else stage the update
|
|
// else update the buffer directly
|
|
if (isCurrentlyInUse(contextVk->getRenderer()))
|
|
{
|
|
// The acquire-and-update path creates a new buffer, which is sometimes more efficient than
|
|
// trying to update the existing one. Firstly, this is not done in the following
|
|
// situations:
|
|
//
|
|
// - For external buffers, the underlying storage cannot be reallocated.
|
|
// - If storage has just been redefined, this path is not taken because a new buffer has
|
|
// already been created by the caller. Besides, this path uses mState.getSize(), which the
|
|
// frontend updates only after this call in situations where the storage may be redefined.
|
|
// This could happen if the buffer memory is DEVICE_LOCAL and
|
|
// renderer->getFeatures().allocateNonZeroMemory.enabled is true. In this case a
|
|
// copyToBuffer is immediately issued after allocation and isCurrentlyInUse will be true.
|
|
// - If this is a self copy through glCopyBufferSubData, |dataSource| will contain a
|
|
// reference to |mBuffer|, in which case source information is lost after acquiring a new
|
|
// buffer.
|
|
//
|
|
// Additionally, this path is taken only if either of the following conditions are true:
|
|
//
|
|
// - If BufferVk does not have any valid data. This means that there is no data to be
|
|
// copied from the old buffer to the new one after acquiring it. This could happen when
|
|
// the application calls glBufferData with the same size and we reuse the existing buffer
|
|
// storage.
|
|
// - If the buffer is used read-only in the current render pass. In this case, acquiring a
|
|
// new buffer is preferred to avoid breaking the render pass.
|
|
// - The update modifies a significant portion of the buffer
|
|
// - The preferCPUForBufferSubData feature is enabled.
|
|
//
|
|
const bool canAcquireAndUpdate = !isExternalBuffer() &&
|
|
updateType != BufferUpdateType::StorageRedefined &&
|
|
!IsSelfCopy(dataSource, mBuffer);
|
|
if (canAcquireAndUpdate &&
|
|
(!mHasValidData || ShouldAvoidRenderPassBreakOnUpdate(contextVk, mBuffer, bufferSize) ||
|
|
ShouldAllocateNewMemoryForUpdate(contextVk, updateSize, bufferSize)))
|
|
{
|
|
ANGLE_TRY(acquireAndUpdate(contextVk, bufferSize, dataSource, updateSize, updateOffset,
|
|
updateType));
|
|
}
|
|
else
|
|
{
|
|
if (canAcquireAndUpdate && RenderPassUsesBufferForReadOnly(contextVk, mBuffer))
|
|
{
|
|
ANGLE_VK_PERF_WARNING(contextVk, GL_DEBUG_SEVERITY_LOW,
|
|
"Breaking the render pass on small upload to large buffer");
|
|
}
|
|
|
|
ANGLE_TRY(stagedUpdate(contextVk, dataSource, updateSize, updateOffset));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ANGLE_TRY(updateBuffer(contextVk, bufferSize, dataSource, updateSize, updateOffset));
|
|
}
|
|
|
|
// Update conversions
|
|
dataUpdated();
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
ConversionBuffer *BufferVk::getVertexConversionBuffer(RendererVk *renderer,
|
|
angle::FormatID formatID,
|
|
GLuint stride,
|
|
size_t offset,
|
|
bool hostVisible)
|
|
{
|
|
for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
|
|
{
|
|
if (buffer.formatID == formatID && buffer.stride == stride && buffer.offset == offset)
|
|
{
|
|
ASSERT(buffer.data && buffer.data->valid());
|
|
return &buffer;
|
|
}
|
|
}
|
|
|
|
mVertexConversionBuffers.emplace_back(renderer, formatID, stride, offset, hostVisible);
|
|
return &mVertexConversionBuffers.back();
|
|
}
|
|
|
|
void BufferVk::dataUpdated()
|
|
{
|
|
for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
|
|
{
|
|
buffer.dirty = true;
|
|
}
|
|
// Now we have valid data
|
|
mHasValidData = true;
|
|
}
|
|
|
|
void BufferVk::onDataChanged()
|
|
{
|
|
dataUpdated();
|
|
}
|
|
|
|
angle::Result BufferVk::acquireBufferHelper(ContextVk *contextVk,
|
|
size_t sizeInBytes,
|
|
BufferUsageType usageType)
|
|
{
|
|
RendererVk *renderer = contextVk->getRenderer();
|
|
size_t size = roundUpPow2(sizeInBytes, kBufferSizeGranularity);
|
|
size_t alignment = renderer->getDefaultBufferAlignment();
|
|
|
|
if (mBuffer.valid())
|
|
{
|
|
mBuffer.releaseBufferAndDescriptorSetCache(contextVk);
|
|
}
|
|
|
|
// Allocate the buffer directly
|
|
ANGLE_TRY(mBuffer.initSuballocation(contextVk, mMemoryTypeIndex, size, alignment, usageType));
|
|
|
|
// Tell the observers (front end) that a new buffer was created, so the necessary
|
|
// dirty bits can be set. This allows the buffer views pointing to the old buffer to
|
|
// be recreated and point to the new buffer, along with updating the descriptor sets
|
|
// to use the new buffer.
|
|
onStateChange(angle::SubjectMessage::InternalMemoryAllocationChanged);
|
|
|
|
return angle::Result::Continue;
|
|
}
|
|
|
|
bool BufferVk::isCurrentlyInUse(RendererVk *renderer) const
|
|
{
|
|
return !renderer->hasResourceUseFinished(mBuffer.getResourceUse());
|
|
}
|
|
|
|
// When a buffer is being completely changed, calculate whether it's better to allocate a new buffer
|
|
// or overwrite the existing one.
|
|
BufferUpdateType BufferVk::calculateBufferUpdateTypeOnFullUpdate(
|
|
RendererVk *renderer,
|
|
size_t size,
|
|
VkMemoryPropertyFlags memoryPropertyFlags,
|
|
BufferUsageType usageType,
|
|
const void *data) const
|
|
{
|
|
// 0-sized updates should be no-op'd before this call.
|
|
ASSERT(size > 0);
|
|
|
|
// If there is no existing buffer, this cannot be a content update.
|
|
if (!mBuffer.valid())
|
|
{
|
|
return BufferUpdateType::StorageRedefined;
|
|
}
|
|
|
|
const bool inUseAndRespecifiedWithoutData = data == nullptr && isCurrentlyInUse(renderer);
|
|
bool redefineStorage = shouldRedefineStorage(renderer, usageType, memoryPropertyFlags, size);
|
|
|
|
// Create a new buffer if the buffer is busy and it's being redefined without data.
|
|
// Additionally, a new buffer is created if any of the parameters change (memory type, usage,
|
|
// size).
|
|
return redefineStorage || inUseAndRespecifiedWithoutData ? BufferUpdateType::StorageRedefined
|
|
: BufferUpdateType::ContentsUpdate;
|
|
}
|
|
|
|
bool BufferVk::shouldRedefineStorage(RendererVk *renderer,
|
|
BufferUsageType usageType,
|
|
VkMemoryPropertyFlags memoryPropertyFlags,
|
|
size_t size) const
|
|
{
|
|
if (mUsageType != usageType)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if (mMemoryPropertyFlags != memoryPropertyFlags)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if (size > mBuffer.getSize())
|
|
{
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
size_t alignment = renderer->getDefaultBufferAlignment();
|
|
size_t sizeInBytes = roundUpPow2(size, kBufferSizeGranularity);
|
|
size_t alignedSize = roundUp(sizeInBytes, alignment);
|
|
if (alignedSize != mBuffer.getSize())
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
} // namespace rx
|