OccamRazor commited on
Commit
55088d3
·
1 Parent(s): 77ff985

Vulkan: Default to 1GB allocations instead of 4GB to avoid fragmentation and driver issues (llama/12434)

Browse files
ggml/src/ggml-vulkan/ggml-vulkan.cpp CHANGED
@@ -2524,13 +2524,9 @@ static vk_device ggml_vk_get_device(size_t idx) {
2524
 
2525
  if (GGML_VK_SUBALLOCATION_BLOCK_SIZE != nullptr) {
2526
  device->suballocation_block_size = std::stoul(GGML_VK_SUBALLOCATION_BLOCK_SIZE);
2527
- #if defined(_WIN32)
2528
- } else if (device->vendor_id == VK_VENDOR_ID_NVIDIA) {
2529
  // Limit batching of allocations to 1GB by default to avoid fragmentation issues
2530
  device->suballocation_block_size = 1024*1024*1024;
2531
- #endif
2532
- } else {
2533
- device->suballocation_block_size = device->max_memory_allocation_size;
2534
  }
2535
  device->suballocation_block_size = std::min(device->suballocation_block_size, device->max_memory_allocation_size);
2536
 
 
2524
 
2525
  if (GGML_VK_SUBALLOCATION_BLOCK_SIZE != nullptr) {
2526
  device->suballocation_block_size = std::stoul(GGML_VK_SUBALLOCATION_BLOCK_SIZE);
2527
+ } else {
 
2528
  // Limit batching of allocations to 1GB by default to avoid fragmentation issues
2529
  device->suballocation_block_size = 1024*1024*1024;
 
 
 
2530
  }
2531
  device->suballocation_block_size = std::min(device->suballocation_block_size, device->max_memory_allocation_size);
2532