/* Copyright (c) 2011 Wildfire Games * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * virtual memory interface. supercedes POSIX mmap; provides support for * large pages, autocommit, and specifying protection flags during allocation. */ #include "precompiled.h" #include "lib/sysdep/vm.h" #include "lib/sysdep/os/win/wutil.h" #include #include "lib/timer.h" #include "lib/bits.h" // round_down #include "lib/alignment.h" // CACHE_ALIGNED #include "lib/module_init.h" #include "lib/sysdep/cpu.h" // cpu_AtomicAdd #include "lib/sysdep/numa.h" #include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64::ApicId #include "lib/sysdep/arch/x86_x64/apic.h" // ProcessorFromApicId #include "lib/sysdep/os/win/wversion.h" #include "lib/sysdep/os/win/winit.h" WINIT_REGISTER_CRITICAL_INIT(wvm_Init); //----------------------------------------------------------------------------- // functions not supported by 32-bit Windows XP static WUTIL_FUNC(pGetCurrentProcessorNumber, DWORD, (VOID)); static WUTIL_FUNC(pGetNumaProcessorNode, BOOL, (UCHAR, PUCHAR)); static WUTIL_FUNC(pVirtualAllocExNuma, LPVOID, (HANDLE, LPVOID, SIZE_T, DWORD, DWORD, DWORD)); static DWORD WINAPI EmulateGetCurrentProcessorNumber(VOID) { const ApicId apicId = GetApicId(); const DWORD processor = (DWORD)ProcessorFromApicId(apicId); ASSERT(processor < os_cpu_MaxProcessors); return processor; } static BOOL WINAPI EmulateGetNumaProcessorNode(UCHAR UNUSED(processor), PUCHAR node) { // given that the system doesn't support GetNumaProcessorNode, // it will also lack VirtualAllocExNuma, so the node value we assign // is ignored by EmulateVirtualAllocExNuma. *node = 0; return TRUE; } static LPVOID WINAPI EmulateVirtualAllocExNuma(HANDLE UNUSED(hProcess), LPVOID p, SIZE_T size, DWORD allocationType, DWORD protect, DWORD UNUSED(node)) { return VirtualAlloc(p, size, allocationType, protect); } static Status wvm_Init() { WUTIL_IMPORT_KERNEL32(GetCurrentProcessorNumber, pGetCurrentProcessorNumber); WUTIL_IMPORT_KERNEL32(GetNumaProcessorNode, pGetNumaProcessorNode); WUTIL_IMPORT_KERNEL32(VirtualAllocExNuma, pVirtualAllocExNuma); if(!pGetCurrentProcessorNumber) pGetCurrentProcessorNumber = &EmulateGetCurrentProcessorNumber; if(!pGetNumaProcessorNode) pGetNumaProcessorNode = &EmulateGetNumaProcessorNode; if(!pVirtualAllocExNuma) pVirtualAllocExNuma = &EmulateVirtualAllocExNuma; return INFO::OK; } namespace vm { //----------------------------------------------------------------------------- // per-processor statistics // (alignment avoids false sharing) CACHE_ALIGNED(struct Statistics) // POD { // thread-safe (required due to concurrent commits) void NotifyLargePageCommit() { cpu_AtomicAdd(&largePageCommits, +1); } void NotifySmallPageCommit() { cpu_AtomicAdd(&smallPageCommits, +1); } intptr_t largePageCommits; intptr_t smallPageCommits; }; static CACHE_ALIGNED(Statistics) statistics[os_cpu_MaxProcessors]; void DumpStatistics() { ENSURE(IsAligned(&statistics[0], cacheLineSize)); ENSURE(IsAligned(&statistics[1], cacheLineSize)); size_t smallPageCommits = 0; size_t largePageCommits = 0; uintptr_t processorsWithNoCommits = 0; for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++) { const Statistics& s = statistics[processor]; if(s.smallPageCommits == 0 && s.largePageCommits == 0) processorsWithNoCommits |= Bit(processor); smallPageCommits += s.smallPageCommits; largePageCommits += s.largePageCommits; } const size_t totalCommits = smallPageCommits+largePageCommits; if(totalCommits == 0) // this module wasn't used => don't print debug output return; const size_t largePageRatio = totalCommits? largePageCommits*100/totalCommits : 0; debug_printf("%d commits (%d, i.e. %d%% of them via large pages)\n", totalCommits, largePageCommits, largePageRatio); if(processorsWithNoCommits != 0) debug_printf(" processors with no commits: %x\n", processorsWithNoCommits); if(numa_NumNodes() > 1) debug_printf("NUMA factor: %.2f\n", numa_Factor()); } //----------------------------------------------------------------------------- // allocator with large-page and NUMA support static bool largePageAllocationTookTooLong = false; static bool ShouldUseLargePages(size_t allocationSize, DWORD allocationType, PageType pageType) { // don't even check for large page support. if(pageType == kSmall) return false; // can't use large pages when reserving - VirtualAlloc would fail with // ERROR_INVALID_PARAMETER. if((allocationType & MEM_COMMIT) == 0) return false; // OS lacks support for large pages. if(os_cpu_LargePageSize() == 0) return false; // large pages are available and application wants them used. if(pageType == kLarge) return true; // default: use a heuristic. { // internal fragmentation would be excessive. if(allocationSize <= largePageSize/2) return false; // a previous attempt already took too long. if(largePageAllocationTookTooLong) return false; // pre-Vista Windows OSes attempt to cope with page fragmentation by // trimming the working set of all processes, thus swapping them out, // and waiting for contiguous regions to appear. this is terribly // slow (multiple seconds), hence the following heuristic: if(wversion_Number() < WVERSION_VISTA) { // if there's not plenty of free memory, then memory is surely // already fragmented. if(os_cpu_MemoryAvailable() < 2000) // 2 GB return false; } } return true; } // used for reserving address space, committing pages, or both. static void* AllocateLargeOrSmallPages(uintptr_t address, size_t size, DWORD allocationType, PageType pageType = kDefault, int prot = PROT_READ|PROT_WRITE) { const HANDLE hProcess = GetCurrentProcess(); const DWORD protect = MemoryProtectionFromPosix(prot); UCHAR node; const DWORD processor = pGetCurrentProcessorNumber(); WARN_IF_FALSE(pGetNumaProcessorNode((UCHAR)processor, &node)); if(ShouldUseLargePages(size, allocationType, pageType)) { // MEM_LARGE_PAGES requires aligned addresses and sizes const size_t largePageSize = os_cpu_LargePageSize(); const uintptr_t alignedAddress = round_down(address, largePageSize); const size_t alignedSize = round_up(size+largePageSize-1, largePageSize); // note: this call can take SECONDS, which is why several checks are // undertaken before we even try. these aren't authoritative, so we // at least prevent future attempts if it takes too long. const double startTime = timer_Time(); COMPILER_FENCE; void* largePages = pVirtualAllocExNuma(hProcess, LPVOID(alignedAddress), alignedSize, allocationType|MEM_LARGE_PAGES, protect, node); const double elapsedTime = timer_Time() - startTime; COMPILER_FENCE; if(elapsedTime > 0.5) largePageAllocationTookTooLong = true; // avoid large pages next time if(largePages) { if((allocationType & MEM_COMMIT) != 0) statistics[processor].NotifyLargePageCommit(); return largePages; } } // try (again) with regular pages void* smallPages = pVirtualAllocExNuma(hProcess, LPVOID(address), size, allocationType, protect, node); if(smallPages) { if((allocationType & MEM_COMMIT) != 0) statistics[processor].NotifySmallPageCommit(); return smallPages; } else { MEMORY_BASIC_INFORMATION mbi = {0}; (void)VirtualQuery(LPCVOID(address), &mbi, sizeof(mbi)); // return value is #bytes written in mbi debug_printf("Allocation failed: base=%p allocBase=%p allocProt=%d size=%d state=%d prot=%d type=%d\n", mbi.BaseAddress, mbi.AllocationBase, mbi.AllocationProtect, mbi.RegionSize, mbi.State, mbi.Protect, mbi.Type); } return 0; } //----------------------------------------------------------------------------- // address space reservation // indicates the extent of a range of address space, // and the parameters for committing large/small pages in it. // // this bookkeeping information increases the safety of on-demand commits, // enables different parameters for separate allocations, and allows // variable alignment because it retains the original base address. // (storing this information within the allocated memory would // require mapping an additional page and may waste an entire // large page if the base address happens to be aligned already.) CACHE_ALIGNED(struct AddressRangeDescriptor) // POD { // attempt to activate this descriptor and reserve address space. // side effect: initializes all fields if successful. // // @param size, commitSize, pageType, prot - see ReserveAddressSpace. // @return INFO::SKIPPED if this descriptor is already in use, // INFO::OK on success, otherwise ERR::NO_MEM (after showing an // error message). Status Allocate(size_t size, size_t commitSize, PageType pageType, int prot) { // if this descriptor wasn't yet in use, mark it as busy // (double-checking is cheaper than cpu_CAS) if(base != 0 || !cpu_CAS(&base, intptr_t(0), intptr_t(this))) return INFO::SKIPPED; ENSURE(size != 0); // probably indicates a bug in caller ENSURE((commitSize % largePageSize) == 0 || pageType == kSmall); ASSERT(pageType == kLarge || pageType == kSmall || pageType == kDefault); ASSERT(prot == PROT_NONE || (prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) == 0); this->commitSize = commitSize; this->pageType = pageType; this->prot = prot; alignment = (pageType == kSmall)? pageSize : largePageSize; totalSize = round_up(size+alignment-1, alignment); // NB: it is meaningless to ask for large pages when reserving // (see ShouldUseLargePages). pageType only affects subsequent commits. base = (intptr_t)AllocateLargeOrSmallPages(0, totalSize, MEM_RESERVE); if(!base) { debug_printf("AllocateLargeOrSmallPages of %lld failed\n", (u64)totalSize); DEBUG_DISPLAY_ERROR(ErrorString()); return ERR::NO_MEM; // NOWARN (error string is more helpful) } alignedBase = round_up(uintptr_t(base), alignment); alignedEnd = alignedBase + round_up(size, alignment); return INFO::OK; } void Free() { vm::Free((void*)base, totalSize); alignment = alignedBase = alignedEnd = 0; totalSize = 0; COMPILER_FENCE; base = 0; // release descriptor for subsequent reuse } bool Contains(uintptr_t address) const { // safety check: we should never see pointers in the no-man's-land // between the original and rounded up base addresses. ENSURE(!(uintptr_t(base) <= address && address < alignedBase)); return (alignedBase <= address && address < alignedEnd); } bool Commit(uintptr_t address) { // (safe because Allocate rounded up to alignment) const uintptr_t alignedAddress = round_down(address, alignment); ENSURE(alignedBase <= alignedAddress && alignedAddress+commitSize <= alignedEnd); return vm::Commit(alignedAddress, commitSize, pageType, prot); } // corresponds to the respective page size (Windows requires // naturally aligned addresses and sizes when committing large pages). // note that VirtualAlloc's alignment defaults to 64 KiB. uintptr_t alignment; uintptr_t alignedBase; // multiple of alignment uintptr_t alignedEnd; // " // (actual requested size / allocated address is required by // ReleaseAddressSpace due to variable alignment.) volatile intptr_t base; // (type is dictated by cpu_CAS) size_t totalSize; // parameters to be relayed to vm::Commit size_t commitSize; PageType pageType; int prot; //private: static const wchar_t* ErrorString() { #if ARCH_IA32 return L"Out of address space (64-bit OS may help)"; #elif OS_WIN // because early AMD64 lacked CMPXCHG16B, the Windows lock-free slist // must squeeze the address, ABA tag and list length (a questionable // design decision) into 64 bits. that leaves 39 bits for the // address, plus 4 implied zero bits due to 16-byte alignment. // [http://www.alex-ionescu.com/?p=50] return L"Out of address space (Windows only provides 8 TiB)"; #else return L"Out of address space"; #endif } }; // (array size governs the max. number of extant allocations) static AddressRangeDescriptor ranges[2*os_cpu_MaxProcessors]; static AddressRangeDescriptor* FindDescriptor(uintptr_t address) { for(size_t idxRange = 0; idxRange < ARRAY_SIZE(ranges); idxRange++) { AddressRangeDescriptor& d = ranges[idxRange]; if(d.Contains(address)) return &d; } return 0; // not contained in any allocated ranges } void* ReserveAddressSpace(size_t size, size_t commitSize, PageType pageType, int prot) { for(size_t idxRange = 0; idxRange < ARRAY_SIZE(ranges); idxRange++) { Status ret = ranges[idxRange].Allocate(size, commitSize, pageType, prot); if(ret == INFO::OK) return (void*)ranges[idxRange].alignedBase; if(ret == ERR::NO_MEM) return 0; // else: descriptor already in use, try the next one } // all descriptors are in use; ranges[] was too small DEBUG_WARN_ERR(ERR::LIMIT); return 0; } void ReleaseAddressSpace(void* p, size_t UNUSED(size)) { // it is customary to ignore null pointers if(!p) return; AddressRangeDescriptor* d = FindDescriptor(uintptr_t(p)); if(d) d->Free(); else { debug_printf("No AddressRangeDescriptor contains %P\n", p); ENSURE(0); } } //----------------------------------------------------------------------------- // commit/decommit, allocate/free, protect TIMER_ADD_CLIENT(tc_commit); bool Commit(uintptr_t address, size_t size, PageType pageType, int prot) { TIMER_ACCRUE_ATOMIC(tc_commit); return AllocateLargeOrSmallPages(address, size, MEM_COMMIT, pageType, prot) != 0; } bool Decommit(uintptr_t address, size_t size) { return VirtualFree(LPVOID(address), size, MEM_DECOMMIT) != FALSE; } bool Protect(uintptr_t address, size_t size, int prot) { const DWORD protect = MemoryProtectionFromPosix(prot); DWORD oldProtect; // required by VirtualProtect const BOOL ok = VirtualProtect(LPVOID(address), size, protect, &oldProtect); return ok != FALSE; } void* Allocate(size_t size, PageType pageType, int prot) { return AllocateLargeOrSmallPages(0, size, MEM_RESERVE|MEM_COMMIT, pageType, prot); } void Free(void* p, size_t UNUSED(size)) { if(p) // otherwise, VirtualFree complains { const BOOL ok = VirtualFree(p, 0, MEM_RELEASE); WARN_IF_FALSE(ok); } } //----------------------------------------------------------------------------- // on-demand commit // NB: avoid using debug_printf here because OutputDebugString has been // observed to generate vectored exceptions when running outside the IDE. static LONG CALLBACK VectoredHandler(const PEXCEPTION_POINTERS ep) { const PEXCEPTION_RECORD er = ep->ExceptionRecord; // we only want to handle access violations. (strictly speaking, // unmapped memory causes page faults, but Windows reports them // with EXCEPTION_ACCESS_VIOLATION.) if(er->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) return EXCEPTION_CONTINUE_SEARCH; // NB: read exceptions are legitimate and occur when updating an // accumulator for the first time. // get the source/destination of the read/write operation that // failed. (NB: don't use er->ExceptionAddress - that's the // location of the code that encountered the fault) const uintptr_t address = (uintptr_t)er->ExceptionInformation[1]; // if unknown (e.g. access violation in kernel address space or // violation of alignment requirements), we don't want to handle it. if(address == ~uintptr_t(0)) return EXCEPTION_CONTINUE_SEARCH; // the address space must have been allocated by ReserveAddressSpace // (otherwise we wouldn't know the desired commitSize/pageType/prot). AddressRangeDescriptor* d = FindDescriptor(address); if(!d) return EXCEPTION_CONTINUE_SEARCH; // NB: the first access to a page isn't necessarily at offset 0 // (memcpy isn't guaranteed to copy sequentially). rounding down // is safe and necessary - see AddressRangeDescriptor::alignment. const uintptr_t alignedAddress = round_down(address, d->alignment); bool ok = d->Commit(alignedAddress); if(!ok) { debug_printf("VectoredHandler: Commit(0x%p) failed; address=0x%p\n", alignedAddress, address); ENSURE(0); return EXCEPTION_CONTINUE_SEARCH; } // continue at (i.e. retry) the same instruction. return EXCEPTION_CONTINUE_EXECUTION; } static PVOID handler; static ModuleInitState initState; static volatile intptr_t references = 0; // atomic static Status InitHandler() { ENSURE(handler == 0); handler = AddVectoredExceptionHandler(TRUE, VectoredHandler); ENSURE(handler != 0); return INFO::OK; } static void ShutdownHandler() { ENSURE(handler != 0); const ULONG ret = RemoveVectoredExceptionHandler(handler); ENSURE(ret != 0); handler = 0; } void BeginOnDemandCommits() { ModuleInit(&initState, InitHandler); cpu_AtomicAdd(&references, +1); } void EndOnDemandCommits() { if(cpu_AtomicAdd(&references, -1) == 1) ModuleShutdown(&initState, ShutdownHandler); } } // namespace vm