263 lines
10 KiB
C++
263 lines
10 KiB
C++
|
|
// Copyright 2022 The Android Open Source Project
|
||
|
|
//
|
||
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
|
// you may not use this file except in compliance with the License.
|
||
|
|
// You may obtain a copy of the License at
|
||
|
|
//
|
||
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
//
|
||
|
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
|
// See the License for the specific language governing permissions and
|
||
|
|
// limitations under the License.
|
||
|
|
|
||
|
|
#include "AstcTexture.h"
|
||
|
|
|
||
|
|
#include <atomic>
|
||
|
|
#include <chrono>
|
||
|
|
#include <cstring>
|
||
|
|
#include <optional>
|
||
|
|
#include <vector>
|
||
|
|
|
||
|
|
#include "aemu/base/HealthMonitor.h"
|
||
|
|
#include "host-common/logging.h"
|
||
|
|
#include "stream-servers/vulkan/vk_util.h"
|
||
|
|
|
||
|
|
namespace gfxstream {
|
||
|
|
namespace vk {
|
||
|
|
namespace {
|
||
|
|
|
||
|
|
using std::chrono::milliseconds;
|
||
|
|
|
||
|
|
// Print stats each time we decompress this many pixels:
|
||
|
|
constexpr uint64_t kProcessedPixelsLogInterval = 10'000'000;
|
||
|
|
|
||
|
|
std::atomic<uint64_t> pixels_processed = 0;
|
||
|
|
std::atomic<uint64_t> ms_elapsed = 0;
|
||
|
|
std::atomic<int64_t> bytes_used = 0;
|
||
|
|
|
||
|
|
uint32_t mipmapSize(uint32_t size, uint32_t mipLevel) {
|
||
|
|
return std::max<uint32_t>(size >> mipLevel, 1);
|
||
|
|
}
|
||
|
|
|
||
|
|
bool isRegionValid(const VkBufferImageCopy& region, uint32_t width, uint32_t height) {
|
||
|
|
// TODO(gregschlom) deal with those cases. See details at:
|
||
|
|
// https://registry.khronos.org/vulkan/specs/1.0-extensions/html/chap20.html#copies-buffers-images-addressing
|
||
|
|
// https://stackoverflow.com/questions/46501832/vulkan-vkbufferimagecopy-for-partial-transfer
|
||
|
|
|
||
|
|
if (region.bufferRowLength != 0 || region.bufferImageHeight != 0) {
|
||
|
|
WARN("ASTC CPU decompression skipped: non-packed buffer");
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
if (region.imageOffset.x != 0 || region.imageOffset.y != 0) {
|
||
|
|
WARN("ASTC CPU decompression skipped: imageOffset is non-zero");
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
if (region.imageExtent.width != width || region.imageExtent.height != height) {
|
||
|
|
WARN("ASTC CPU decompression skipped: imageExtent is less than the entire image");
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
} // namespace
|
||
|
|
|
||
|
|
AstcTexture::AstcTexture(VulkanDispatch* vk, VkDevice device, VkPhysicalDevice physicalDevice,
|
||
|
|
VkExtent3D imgSize, uint32_t blockWidth, uint32_t blockHeight,
|
||
|
|
AstcCpuDecompressor* decompressor)
|
||
|
|
: mVk(vk),
|
||
|
|
mDevice(device),
|
||
|
|
mPhysicalDevice(physicalDevice),
|
||
|
|
mImgSize(imgSize),
|
||
|
|
mBlockWidth(blockWidth),
|
||
|
|
mBlockHeight(blockHeight),
|
||
|
|
mDecompressor(decompressor) {}
|
||
|
|
|
||
|
|
AstcTexture::~AstcTexture() { destroyVkBuffer(); }
|
||
|
|
|
||
|
|
bool AstcTexture::canDecompressOnCpu() const { return mDecompressor->available(); }
|
||
|
|
|
||
|
|
uint8_t* AstcTexture::createVkBufferAndMapMemory(size_t bufferSize) {
|
||
|
|
VkResult res;
|
||
|
|
mBufferSize = bufferSize; // Save the buffer size, for statistics purpose only
|
||
|
|
bytes_used += bufferSize;
|
||
|
|
|
||
|
|
if (mDecompBuffer || mDecompBufferMemory) {
|
||
|
|
WARN(
|
||
|
|
"ASTC CPU decompression failed: tried to decompress same image more than once. Falling"
|
||
|
|
" back to GPU decompression");
|
||
|
|
return nullptr;
|
||
|
|
}
|
||
|
|
|
||
|
|
VkBufferCreateInfo bufferInfo = {
|
||
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||
|
|
.size = bufferSize,
|
||
|
|
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||
|
|
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||
|
|
};
|
||
|
|
res = mVk->vkCreateBuffer(mDevice, &bufferInfo, nullptr, &mDecompBuffer);
|
||
|
|
if (res != VK_SUCCESS) {
|
||
|
|
WARN("ASTC CPU decompression: vkCreateBuffer failed: %d", res);
|
||
|
|
mDecompBuffer = VK_NULL_HANDLE;
|
||
|
|
return nullptr;
|
||
|
|
}
|
||
|
|
|
||
|
|
VkMemoryRequirements memRequirements;
|
||
|
|
mVk->vkGetBufferMemoryRequirements(mDevice, mDecompBuffer, &memRequirements);
|
||
|
|
|
||
|
|
std::optional<uint32_t> memIndex = vk_util::findMemoryType(
|
||
|
|
mVk, mPhysicalDevice, memRequirements.memoryTypeBits,
|
||
|
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||
|
|
VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
|
||
|
|
if (!memIndex) {
|
||
|
|
// Do it again, but without VK_MEMORY_PROPERTY_HOST_CACHED_BIT this time
|
||
|
|
memIndex = vk_util::findMemoryType(
|
||
|
|
mVk, mPhysicalDevice, memRequirements.memoryTypeBits,
|
||
|
|
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
||
|
|
}
|
||
|
|
if (!memIndex) {
|
||
|
|
WARN("ASTC CPU decompression: no suitable memory type to decompress the image");
|
||
|
|
return nullptr;
|
||
|
|
}
|
||
|
|
|
||
|
|
VkMemoryAllocateInfo allocInfo = {
|
||
|
|
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||
|
|
.allocationSize = memRequirements.size,
|
||
|
|
.memoryTypeIndex = *memIndex,
|
||
|
|
};
|
||
|
|
res = mVk->vkAllocateMemory(mDevice, &allocInfo, nullptr, &mDecompBufferMemory);
|
||
|
|
if (res != VK_SUCCESS) {
|
||
|
|
WARN("ASTC CPU decompression: vkAllocateMemory failed: %d", res);
|
||
|
|
mDecompBufferMemory = VK_NULL_HANDLE;
|
||
|
|
return nullptr;
|
||
|
|
}
|
||
|
|
|
||
|
|
res = mVk->vkBindBufferMemory(mDevice, mDecompBuffer, mDecompBufferMemory, 0);
|
||
|
|
if (res != VK_SUCCESS) {
|
||
|
|
WARN("ASTC CPU decompression: vkBindBufferMemory failed: %d", res);
|
||
|
|
return nullptr;
|
||
|
|
}
|
||
|
|
|
||
|
|
uint8_t* decompData;
|
||
|
|
res = mVk->vkMapMemory(mDevice, mDecompBufferMemory, 0, bufferSize, 0, (void**)&decompData);
|
||
|
|
if (res != VK_SUCCESS) {
|
||
|
|
WARN("ASTC CPU decompression: vkMapMemory failed: %d", res);
|
||
|
|
return nullptr;
|
||
|
|
}
|
||
|
|
|
||
|
|
return decompData;
|
||
|
|
}
|
||
|
|
|
||
|
|
void AstcTexture::destroyVkBuffer() {
|
||
|
|
bytes_used -= mBufferSize;
|
||
|
|
if (mVk && mDevice) {
|
||
|
|
mVk->vkDestroyBuffer(mDevice, mDecompBuffer, nullptr);
|
||
|
|
mVk->vkFreeMemory(mDevice, mDecompBufferMemory, nullptr);
|
||
|
|
mDecompBuffer = VK_NULL_HANDLE;
|
||
|
|
mDecompBufferMemory = VK_NULL_HANDLE;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
void AstcTexture::on_vkCmdCopyBufferToImage(VkCommandBuffer commandBuffer, uint8_t* srcAstcData,
|
||
|
|
size_t astcDataSize, VkImage dstImage,
|
||
|
|
VkImageLayout dstImageLayout, uint32_t regionCount,
|
||
|
|
const VkBufferImageCopy* pRegions,
|
||
|
|
const VkDecoderContext& context) {
|
||
|
|
auto watchdog =
|
||
|
|
WATCHDOG_BUILDER(context.healthMonitor, "AstcTexture::on_vkCmdCopyBufferToImage").build();
|
||
|
|
auto start_time = std::chrono::steady_clock::now();
|
||
|
|
mSuccess = false;
|
||
|
|
size_t decompSize = 0; // How many bytes we need to hold the decompressed data
|
||
|
|
|
||
|
|
// Holds extra data about the region
|
||
|
|
struct RegionInfo {
|
||
|
|
uint32_t width; // actual width (ie: mipmap width)
|
||
|
|
uint32_t height; // actual height (ie: mipmap height)
|
||
|
|
uint32_t compressedSize; // size of ASTC data for that region
|
||
|
|
};
|
||
|
|
|
||
|
|
std::vector<RegionInfo> regionInfos;
|
||
|
|
regionInfos.reserve(regionCount);
|
||
|
|
|
||
|
|
// Make a copy of the regions and update the buffer offset of each to reflect the
|
||
|
|
// correct location of the decompressed data
|
||
|
|
std::vector<VkBufferImageCopy> decompRegions(pRegions, pRegions + regionCount);
|
||
|
|
for (auto& decompRegion : decompRegions) {
|
||
|
|
const uint32_t mipLevel = decompRegion.imageSubresource.mipLevel;
|
||
|
|
const uint32_t width = mipmapSize(mImgSize.width, mipLevel);
|
||
|
|
const uint32_t height = mipmapSize(mImgSize.height, mipLevel);
|
||
|
|
const uint32_t numAstcBlocks = ((width + mBlockWidth - 1) / mBlockWidth) *
|
||
|
|
((height + mBlockHeight - 1) / mBlockHeight);
|
||
|
|
const uint32_t compressedSize = numAstcBlocks * 16;
|
||
|
|
// We haven't updated decompRegion.bufferOffset yet, so it's still the _compressed_ offset.
|
||
|
|
const uint32_t compressedDataOffset = decompRegion.bufferOffset;
|
||
|
|
|
||
|
|
// Do all the precondition checks
|
||
|
|
if (!isRegionValid(decompRegion, width, height)) return;
|
||
|
|
if (compressedDataOffset + compressedSize > astcDataSize) {
|
||
|
|
WARN("ASTC CPU decompression: data out of bounds. Offset: %llu, Size: %llu, Total %llu",
|
||
|
|
compressedDataOffset, compressedSize, astcDataSize);
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
decompRegion.bufferOffset = decompSize;
|
||
|
|
decompSize += width * height * 4;
|
||
|
|
regionInfos.push_back({width, height, compressedSize});
|
||
|
|
}
|
||
|
|
|
||
|
|
// Create a new VkBuffer to hold the decompressed data
|
||
|
|
uint8_t* decompData = createVkBufferAndMapMemory(decompSize);
|
||
|
|
if (!decompData) {
|
||
|
|
destroyVkBuffer(); // The destructor would have done it anyway, but may as well do it early
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Decompress each region
|
||
|
|
for (int i = 0; i < regionCount; i++) {
|
||
|
|
const auto& compRegion = pRegions[i];
|
||
|
|
const auto& decompRegion = decompRegions[i];
|
||
|
|
const auto& regionInfo = regionInfos[i];
|
||
|
|
|
||
|
|
int32_t status = mDecompressor->decompress(
|
||
|
|
regionInfo.width, regionInfo.height, mBlockWidth, mBlockHeight,
|
||
|
|
srcAstcData + compRegion.bufferOffset, regionInfo.compressedSize,
|
||
|
|
decompData + decompRegion.bufferOffset);
|
||
|
|
|
||
|
|
if (status != 0) {
|
||
|
|
WARN("ASTC CPU decompression failed: %s - will try compute shader instead.",
|
||
|
|
mDecompressor->getStatusString(status));
|
||
|
|
mVk->vkUnmapMemory(mDevice, mDecompBufferMemory);
|
||
|
|
destroyVkBuffer();
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
mVk->vkUnmapMemory(mDevice, mDecompBufferMemory);
|
||
|
|
|
||
|
|
// Finally, actually copy the buffer to the image
|
||
|
|
mVk->vkCmdCopyBufferToImage(commandBuffer, mDecompBuffer, dstImage, dstImageLayout,
|
||
|
|
decompRegions.size(), decompRegions.data());
|
||
|
|
|
||
|
|
mSuccess = true;
|
||
|
|
auto end_time = std::chrono::steady_clock::now();
|
||
|
|
|
||
|
|
// Compute stats
|
||
|
|
pixels_processed += decompSize / 4;
|
||
|
|
ms_elapsed += std::chrono::duration_cast<milliseconds>(end_time - start_time).count();
|
||
|
|
|
||
|
|
uint64_t total_pixels = pixels_processed.load();
|
||
|
|
uint64_t total_time = ms_elapsed.load();
|
||
|
|
|
||
|
|
if (total_pixels >= kProcessedPixelsLogInterval && total_time > 0) {
|
||
|
|
pixels_processed.store(0);
|
||
|
|
ms_elapsed.store(0);
|
||
|
|
INFO("ASTC CPU decompression: %.2f Mpix in %.2f seconds (%.2f Mpix/s). Total mem: %.2f MB",
|
||
|
|
total_pixels / 1'000'000.0, total_time / 1000.0,
|
||
|
|
(float)total_pixels / total_time / 1000.0, bytes_used / 1000000.0);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
} // namespace vk
|
||
|
|
} // namespace gfxstream
|