diff options
author | Tim Besard <tim.besard@gmail.com> | 2011-11-02 14:57:47 +0100 |
---|---|---|
committer | Tim Besard <tim.besard@gmail.com> | 2011-11-02 14:57:47 +0100 |
commit | 017f97abaf748a23314f4f52d45da454fd48591a (patch) | |
tree | edbd1f040539f0d99d275d12452bd2747d7f2e36 | |
parent | fa1c64acd5e0ad7bc9e549da09cf5be3c794b435 (diff) |
Switching to AsmJit generated chasing routines.
38 files changed, 41154 insertions, 618 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 777c437..c232e64 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,9 @@ find_package(Threads) find_library(LIBNUMA numa) option(USE_LIBNUMA "Build against NUMA libraries" ON) +include_directories(lib) +add_subdirectory(lib/AsmJit) + # # Code compilation @@ -45,3 +48,4 @@ if (USE_LIBNUMA) message(STATUS "WARNING: libnuma not found, not compiling against it") endif () endif () +target_link_libraries(chase AsmJit) diff --git a/lib/AsmJit/ApiBegin.h b/lib/AsmJit/ApiBegin.h new file mode 100644 index 0000000..1f18ad2 --- /dev/null +++ b/lib/AsmJit/ApiBegin.h @@ -0,0 +1,43 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// MSVC +#if defined(_MSC_VER) + +// Disable some warnings we know about +#pragma warning(push) +#pragma warning(disable: 4127) // conditional expression is constant +#pragma warning(disable: 4251) // struct needs to have dll-interface to be used + // by clients of struct ... +#pragma warning(disable: 4275) // non dll-interface struct ... used as base for + // dll-interface struct +#pragma warning(disable: 4355) // this used in base member initializer list +#pragma warning(disable: 4800) // forcing value to bool 'true' or 'false' + +// Rename symbols. +#define vsnprintf _vsnprintf +#define snprintf _snprintf + +#endif // _MSC_VER diff --git a/lib/AsmJit/ApiEnd.h b/lib/AsmJit/ApiEnd.h new file mode 100644 index 0000000..3cc5850 --- /dev/null +++ b/lib/AsmJit/ApiEnd.h @@ -0,0 +1,35 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#if defined(_MSC_VER) + +// Pop disabled warnings by ApiBegin.h +#pragma warning(pop) + +// Rename symbols back. +#undef vsnprintf +#undef snprintf + +#endif // _MSC_VER diff --git a/lib/AsmJit/AsmJit.h b/lib/AsmJit/AsmJit.h new file mode 100644 index 0000000..089c098 --- /dev/null +++ b/lib/AsmJit/AsmJit.h @@ -0,0 +1,370 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_H +#define _ASMJIT_H + +//! @mainpage +//! +//! @brief AsmJit is complete x86/x64 JIT Assembler for C++ language +//! +//! It supports FPU, MMX, 3dNow, SSE, SSE2, SSE3 and SSE4 intrinsics, powerful +//! compiler that helps to write portable functions for 32-bit (x86) and 64-bit +//! (x64) architectures. AsmJit can be used to create functions at runtime that +//! can be called from existing (but also generated) C/C++ code. +//! +//! AsmJit is crossplatform library that supports various compilers and +//! operating systems. Currently only limitation is x86 (32-bit) or x64 (64-bit) +//! processor. Currently tested operating systems are Windows (32-bit and 64-bit), +//! Linux (32-bit and 64-bit) and MacOSX (32-bit). +//! +//! @section AsmJit_Main_Introduction Introduction +//! +//! AsmJit library contains two main classes for code generation with different +//! goals. First main code generation class is called @c AsmJit::Assembler and +//! contains low level API that can be used to generate JIT binary code. It +//! directly emits binary stream that represents encoded x86/x64 assembler +//! opcodes. Together with operands and labels it can be used to generate +//! complete code. For details look to @ref AsmJit_Core and @ref AsmJit_Compiler +//! sections. +//! +//! There is also class named @c AsmJit::Compiler that allows to develop +//! cross-platform assembler code without worring about function calling +//! conventions and registers allocation. It can be also used to write 32-bit +//! and 64-bit portable code. Compiler is recommended class to use for code +//! generation. +//! +//! Everything in AsmJit library is in @c AsmJit namespace. +//! +//! @section AsmJit_Main_CodeGeneration Code Generation +//! +//! - @ref AsmJit_Core "Assembler core" - Operands, intrinsics and low-level assembler. +//! - @ref AsmJit_Compiler "Compiler" - High level code generation. +//! - @ref AsmJit_CpuInfo "Cpu Information" - Get information about host processor. +//! - @ref AsmJit_Logging "Logging" - Logging and error handling. +//! - @ref AsmJit_MemoryManagement "Memory Management" - Virtual memory management. +//! +//! @section AsmJit_Main_Configuration Configuration, Definitions and Utilities +//! +//! - @ref AsmJit_Config "Configuration" - Macros used to configure AsmJit. +//! - @ref AsmJit_Util "Utilities" - Utilities and helper classes. +//! +//! @section AsmJit_Main_HomePage AsmJit Homepage +//! +//! - http://code.google.com/p/asmjit/ +//! +//! @section AsmJit_Main_X86X64Resources External X86/X64 Assembler Resources +//! - http://www.agner.org/optimize/ +//! - http://www.mark.masmcode.com/ (Assembler Tips) +//! - http://avisynth.org/mediawiki/Filter_SDK/Assembler_optimizing (Optimizing) +//! - http://www.ragestorm.net/distorm/ (Disassembling) +//! +//! @section AsmJit_Main_Terminology Terminology +//! +//! - <b>Non-volatile (preserved) register</b> - Register that can't be changed +//! by callee (callee must save and restore it if it want to use it inside). +//! +//! - <b>Volatile (non-preserved) register</b> - The opossite. Register that can +//! be freely used by callee. The caller must free all registers before calling +//! other function. + + +//! @defgroup AsmJit_Core Assembler core (operands, intrinsics and low-level assembler). +//! +//! Contains classes related to @c AsmJit::Assembler that're directly used +//! to generate machine code stream. It's one of oldest and fastest method +//! to generate machine code using AsmJit library. +//! +//! - See @c AsmJit::Assembler class for low level code generation +//! documentation. +//! - See @c AsmJit::Operand for AsmJit operand's overview. +//! +//! @section AsmJit_Core_Registers Registers +//! +//! There are static objects that represents X86 and X64 registers. They can +//! be used directly (like @c eax, @c mm, @c xmm, ...) or created through +//! these functions: +//! +//! - @c AsmJit::mk_gpb() - make general purpose byte register +//! - @c AsmJit::mk_gpw() - make general purpose word register +//! - @c AsmJit::mk_gpd() - make general purpose dword register +//! - @c AsmJit::mk_gpq() - make general purpose qword register +//! - @c AsmJit::mk_mm() - make mmx register +//! - @c AsmJit::mk_xmm() - make sse register +//! - @c AsmJit::st() - make x87 register +//! +//! @section AsmJit_Core_Addressing Addressing +//! +//! X86 and x64 architectures contains several addressing modes and most ones +//! are possible with AsmJit library. Memory represents are represented by +//! @c AsmJit::Mem class. These functions are used to make operands that +//! represents memory addresses: +//! +//! - @c AsmJit::ptr() +//! - @c AsmJit::byte_ptr() +//! - @c AsmJit::word_ptr() +//! - @c AsmJit::dword_ptr() +//! - @c AsmJit::qword_ptr() +//! - @c AsmJit::tword_ptr() +//! - @c AsmJit::dqword_ptr() +//! - @c AsmJit::mmword_ptr() +//! - @c AsmJit::xmmword_ptr() +//! - @c AsmJit::sysint_ptr() +//! +//! Most useful function to make pointer should be @c AsmJit::ptr(). It creates +//! pointer to the target with unspecified size. Unspecified size works in all +//! intrinsics where are used registers (this means that size is specified by +//! register operand or by instruction itself). For example @c AsmJit::ptr() +//! can't be used with @c AsmJit::Assembler::inc() instruction. In this case +//! size must be specified and it's also reason to make difference between +//! pointer sizes. +//! +//! Supported are simple address forms (register + displacement) and complex +//! address forms (register + (register << shift) + displacement). +//! +//! @section AsmJit_Core_Immediates Immediates +//! +//! Immediate values are constants thats passed directly after instruction +//! opcode. To create such value use @c AsmJit::imm() or @c AsmJit::uimm() +//! methods to create signed or unsigned immediate value. +//! +//! @sa @c AsmJit::Compiler. + + +//! @defgroup AsmJit_Compiler Compiler (high-level code generation). +//! +//! Contains classes related to @c AsmJit::Compiler that can be used +//! to generate code using high-level constructs. +//! +//! - See @c AsmJit::Compiler class for high level code generation +//! documentation - calling conventions, function declaration +//! and variables management. + +//! @defgroup AsmJit_Config Configuration. +//! +//! Contains macros that can be redefined to fit into any project. + + +//! @defgroup AsmJit_CpuInfo CPU information. +//! +//! X86 or x64 cpuid instruction allows to get information about processor +//! vendor and it's features. It's always used to detect features like MMX, +//! SSE and other newer ones. +//! +//! AsmJit library supports low level cpuid call implemented internally as +//! C++ function using inline assembler or intrinsics and also higher level +//! CPU features detection. The low level function (also used by higher level +//! one) is @c AsmJit::cpuid(). +//! +//! AsmJit library also contains higher level function @c AsmJit::getCpuInfo() +//! that returns features detected by the library. The detection process is +//! done only once and it's cached for all next calls. @c AsmJit::CpuInfo +//! structure not contains only information through @c AsmJit::cpuid(), but +//! there is also small multiplatform code to detect number of processors +//! (or cores) throught operating system API. +//! +//! It's recommended to use @c AsmJit::cpuInfo to detect and check for +//! host processor features. +//! +//! Example how to use AsmJit::cpuid(): +//! +//! @code +//! // All functions and structures are in AsmJit namesapce. +//! using namespace AsmJit; +//! +//! // Here will be retrieved result of cpuid call. +//! CpuId out; +//! +//! // Use cpuid function to do the job. +//! cpuid(0 /* eax */, &out /* eax, ebx, ecx, edx */); +//! +//! // Id eax argument to cpuid is 0, ebx, ecx and edx registers +//! // are filled with cpu vendor. +//! char vendor[13]; +//! memcpy(i->vendor, &out.ebx, 4); +//! memcpy(i->vendor + 4, &out.edx, 4); +//! memcpy(i->vendor + 8, &out.ecx, 4); +//! vendor[12] = '\0'; +//! +//! // Print vendor +//! puts(vendor); +//! @endcode +//! +//! If you want to use AsmJit::cpuid() function instead of higher level +//! @c AsmJit::getCpuInfo(), please read processor manuals provided by Intel, +//! AMD or other manufacturers for cpuid instruction details. +//! +//! Example of using @c AsmJit::getCpuInfo(): +//! +//! @code +//! // All functions and structures are in AsmJit namesapce. +//! using namespace AsmJit; +//! +//! // Call to cpuInfo return CpuInfo structure that shouldn't be modified. +//! // Make it const by default. +//! const CpuInfo *i = getCpuInfo(); +//! +//! // Now you are able to get specific features. +//! +//! // Processor has SSE2 +//! if (i->features & CPU_FEATURE_SSE2) +//! { +//! // your code... +//! } +//! // Processor has MMX +//! else if (i->features & CPU_FEATURE__MMX) +//! { +//! // your code... +//! } +//! // Processor is old, no SSE2 or MMX support. +//! else +//! { +//! // your code... +//! } +//! @endcode +//! +//! Better example is in AsmJit/Test/testcpu.cpp file. +//! +//! @sa AsmJit::cpuid, @c AsmJit::cpuInfo. + + +//! @defgroup AsmJit_Logging Logging and error handling. +//! +//! Contains classes related to loging assembler output. Currently logging +//! is implemented in @c AsmJit::Logger class.You can override +//! @c AsmJit::Logger::log() to log messages into your stream. There is also +//! @c FILE based logger implemented in @c AsmJit::FileLogger class. +//! +//! To log your assembler output to FILE stream use this code: +//! +//! @code +//! // Create assembler +//! Assembler a; +//! +//! // Create and set file based logger +//! FileLogger logger(stderr); +//! a.setLogger(&logger); +//! @endcode +//! +//! You can see that logging goes through @c AsmJit::Assembler. If you are +//! using @c AsmJit::Compiler and you want to log messages in correct assembler +//! order, you should look at @c AsmJit::Compiler::comment() method. It allows +//! you to insert text message into @c AsmJit::Emittable list and +//! @c AsmJit::Compiler will send your message to @c AsmJit::Assembler in +//! correct order. +//! +//! @sa @c AsmJit::Logger, @c AsmJit::FileLogger. + + +//! @defgroup AsmJit_MemoryManagement Virtual memory management. +//! +//! Using @c AsmJit::Assembler or @c AsmJit::Compiler to generate machine +//! code is not final step. Each generated code needs to run in memory +//! that is not protected against code execution. To alloc this code it's +//! needed to use operating system functions provided to enable execution +//! code in specified memory block or to allocate memory that is not +//! protected. The solution is always to use @c See AsmJit::Assembler::make() +//! and @c AsmJit::Compiler::make() functions that can allocate memory and +//! relocate code for you. But AsmJit also contains classes for manual memory +//! management thats internally used by AsmJit but can be used by programmers +//! too. +//! +//! Memory management contains low level and high level classes related to +//! allocating and freeing virtual memory. Low level class is +//! @c AsmJit::VirtualMemory that can allocate and free full pages of +//! virtual memory provided by operating system. Higher level class is +//! @c AsmJit::MemoryManager that is able to manage complete allocation and +//! free mechanism. It internally uses larger chunks of memory to make +//! allocation fast and effective. +//! +//! Using @c AsmJit::VirtualMemory::alloc() is crossplatform way how to +//! allocate this kind of memory without worrying about operating system +//! and it's API. Each memory block that is no longer needed should be +//! freed by @c AsmJit::VirtualMemory::free() method. If you want better +//! comfort and malloc()/free() interface, look at the +//! @c AsmJit::MemoryManager class. +//! +//! @sa @c AsmJit::VirtualMemory, @ AsmJit::MemoryManager. + + +//! @defgroup AsmJit_Util Utilities and helper classes. +//! +//! Contains some helper classes that's used by AsmJit library. + + + +//! @addtogroup AsmJit_Config +//! @{ + +//! @def ASMJIT_WINDOWS +//! @brief Macro that is declared if AsmJit is compiled for Windows. + +//! @def ASMJIT_POSIX +//! @brief Macro that is declared if AsmJit is compiled for unix like +//! operating system. + +//! @def ASMJIT_API +//! @brief Attribute that's added to classes that can be exported if AsmJit +//! is compiled as a dll library. + +//! @def ASMJIT_MALLOC +//! @brief Function to call to allocate dynamic memory. + +//! @def ASMJIT_REALLOC +//! @brief Function to call to reallocate dynamic memory. + +//! @def ASMJIT_FREE +//! @brief Function to call to free dynamic memory. + +//! @def ASMJIT_ASSERT +//! @brief Assertion macro. Default implementation calls +//! @c AsmJit::assertionFailure() function. + +//! @} + + +//! @namespace AsmJit +//! @brief Main AsmJit library namespace. +//! +//! There are not other namespaces used in AsmJit library. + + +// [Includes] +#include "Build.h" +#include "Assembler.h" +#include "CodeGenerator.h" +#include "Compiler.h" +#include "CpuInfo.h" +#include "Defs.h" +#include "Logger.h" +#include "MemoryManager.h" +#include "Operand.h" +#include "Platform.h" +#include "Util.h" + + +// [Guard] +#endif // _ASMJIT_H diff --git a/lib/AsmJit/Assembler.cpp b/lib/AsmJit/Assembler.cpp new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/lib/AsmJit/Assembler.cpp diff --git a/lib/AsmJit/Assembler.h b/lib/AsmJit/Assembler.h new file mode 100644 index 0000000..daf6ec3 --- /dev/null +++ b/lib/AsmJit/Assembler.h @@ -0,0 +1,55 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_ASSEMBLER_H +#define _ASMJIT_ASSEMBLER_H + +// [Dependencies] +#include "Build.h" + +// ============================================================================ +// [AsmJit::Forward Declarations] +// ============================================================================ + +namespace AsmJit { + +struct Logger; +struct MemoryManager; +struct EInstruction; + +} // AsmJit namespace + +// ============================================================================ +// [Platform Specific] +// ============================================================================ + +// [X86 / X64] +#if defined(ASMJIT_X86) || defined(ASMJIT_X64) +#include "AssemblerX86X64.h" +#endif // ASMJIT_X86 || ASMJIT_X64 + +// [Guard] +#endif // _ASMJIT_ASSEMBLER_H diff --git a/lib/AsmJit/AssemblerX86X64.cpp b/lib/AsmJit/AssemblerX86X64.cpp new file mode 100644 index 0000000..904c1c9 --- /dev/null +++ b/lib/AsmJit/AssemblerX86X64.cpp @@ -0,0 +1,2979 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// We are using sprintf() here. +#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS) +#define _CRT_SECURE_NO_WARNINGS +#endif // _MSC_VER + +// [Dependencies] +#include "Assembler.h" +#include "CodeGenerator.h" +#include "CpuInfo.h" +#include "Defs.h" +#include "Logger.h" +#include "MemoryManager.h" +#include "Platform.h" +#include "Util_p.h" + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +// A little bit C++. +#include <new> + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +#if defined(ASMJIT_X64) + +// ============================================================================ +// [AsmJit::TrampolineWriter] +// ============================================================================ + +//! @brief Class used to determine size of trampoline and as trampoline writer. +struct ASMJIT_HIDDEN TrampolineWriter +{ + // Size of trampoline + enum { + TRAMPOLINE_JMP = 6, + TRAMPOLINE_ADDR = sizeof(sysint_t), + + TRAMPOLINE_SIZE = TRAMPOLINE_JMP + TRAMPOLINE_ADDR + }; + + // Write trampoline into code at address @a code that will jump to @a target. + static void writeTrampoline(uint8_t* code, void* target) + { + // Jmp. + code[0] = 0xFF; + // ModM (RIP addressing). + code[1] = 0x25; + // Offset (zero). + ((uint32_t*)(code + 2))[0] = 0; + // Absolute address. + ((sysuint_t*)(code + TRAMPOLINE_JMP))[0] = (sysuint_t)target; + } +}; + +#endif // ASMJIT_X64 + +// ============================================================================ +// [AsmJit::AssemblerCore - Construction / Destruction] +// ============================================================================ + +AssemblerCore::AssemblerCore(CodeGenerator* codeGenerator) ASMJIT_NOTHROW : + _codeGenerator(codeGenerator != NULL ? codeGenerator : CodeGenerator::getGlobal()), + _zone(16384 - sizeof(Zone::Chunk) - 32), + _logger(NULL), + _error(0), + _properties((1 << PROPERTY_OPTIMIZE_ALIGN)), + _emitOptions(0), + _buffer(32), // Max instruction length is 15, but we can align up to 32 bytes. + _trampolineSize(0), + _unusedLinks(NULL), + _comment(NULL) +{ +} + +AssemblerCore::~AssemblerCore() ASMJIT_NOTHROW +{ +} + +// ============================================================================ +// [AsmJit::AssemblerCore - Logging] +// ============================================================================ + +void AssemblerCore::setLogger(Logger* logger) ASMJIT_NOTHROW +{ + _logger = logger; +} + +// ============================================================================ +// [AsmJit::AssemblerCore - Error Handling] +// ============================================================================ + +void AssemblerCore::setError(uint32_t error) ASMJIT_NOTHROW +{ + _error = error; + if (_error == ERROR_NONE) return; + + if (_logger) + { + _logger->logFormat("*** ASSEMBLER ERROR: %s (%u).\n", + getErrorCodeAsString(error), + (unsigned int)error); + } +} + +// ============================================================================ +// [AsmJit::AssemblerCore - Properties] +// ============================================================================ + +uint32_t AssemblerCore::getProperty(uint32_t propertyId) +{ + return (_properties & (1 << propertyId)) != 0; +} + +void AssemblerCore::setProperty(uint32_t propertyId, uint32_t value) +{ + if (value) + _properties |= (1 << propertyId); + else + _properties &= ~(1 << propertyId); +} + +// ============================================================================ +// [AsmJit::AssemblerCore - Buffer] +// ============================================================================ + +void AssemblerCore::clear() ASMJIT_NOTHROW +{ + _buffer.clear(); + _relocData.clear(); + _zone.clear(); + + if (_error) setError(ERROR_NONE); +} + +void AssemblerCore::free() ASMJIT_NOTHROW +{ + _zone.freeAll(); + _buffer.free(); + _relocData.free(); + + if (_error) setError(ERROR_NONE); +} + +uint8_t* AssemblerCore::takeCode() ASMJIT_NOTHROW +{ + uint8_t* code = _buffer.take(); + _relocData.clear(); + _zone.clear(); + + if (_error) setError(ERROR_NONE); + return code; +} + +// ============================================================================ +// [AsmJit::AssemblerCore - Stream Setters / Getters] +// ============================================================================ + +void AssemblerCore::setVarAt(sysint_t pos, sysint_t i, uint8_t isUnsigned, uint32_t size) ASMJIT_NOTHROW +{ + if (size == 1 && !isUnsigned) setByteAt (pos, (int8_t )i); + else if (size == 1 && isUnsigned) setByteAt (pos, (uint8_t )i); + else if (size == 2 && !isUnsigned) setWordAt (pos, (int16_t )i); + else if (size == 2 && isUnsigned) setWordAt (pos, (uint16_t)i); + else if (size == 4 && !isUnsigned) setDWordAt(pos, (int32_t )i); + else if (size == 4 && isUnsigned) setDWordAt(pos, (uint32_t)i); +#if defined(ASMJIT_X64) + else if (size == 8 && !isUnsigned) setQWordAt(pos, (int64_t )i); + else if (size == 8 && isUnsigned) setQWordAt(pos, (uint64_t)i); +#endif // ASMJIT_X64 + else + ASMJIT_ASSERT(0); +} + +// ============================================================================ +// [AsmJit::AssemblerCore - Assembler Emitters] +// ============================================================================ + +bool AssemblerCore::canEmit() ASMJIT_NOTHROW +{ + // If there is an error, we can't emit another instruction until last error + // is cleared by calling @c setError(ERROR_NONE). If something caused an error + // while generating code it's probably fatal in all cases. You can't use + // generated code, because you are not sure about its status. + if (_error) return false; + + // The ensureSpace() method returns true on success and false on failure. We + // are catching return value and setting error code here. + if (ensureSpace()) return true; + + // If we are here, there is memory allocation error. Note that this is HEAP + // allocation error, virtual allocation error can be caused only by + // AsmJit::VirtualMemory class! + setError(ERROR_NO_HEAP_MEMORY); + return false; +} + +void AssemblerCore::_emitSegmentPrefix(const Operand& rm) ASMJIT_NOTHROW +{ + static const uint8_t prefixes[] = { 0x00, 0x2E, 0x36, 0x3E, 0x26, 0x64, 0x65 }; + + if (rm.isMem()) + { + sysuint_t segmentPrefix = reinterpret_cast<const Mem&>(rm).getSegmentPrefix(); + if (segmentPrefix) _emitByte(prefixes[segmentPrefix]); + } +} + +void AssemblerCore::_emitModM( + uint8_t opReg, const Mem& mem, sysint_t immSize) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(mem.getType() == OPERAND_MEM); + + uint8_t baseReg = mem.getBase() & 0x7; + uint8_t indexReg = mem.getIndex() & 0x7; + sysint_t disp = mem.getDisplacement(); + uint32_t shift = mem.getShift(); + + if (mem.getMemType() == OPERAND_MEM_NATIVE) + { + // [base + displacemnt] + if (!mem.hasIndex()) + { + // ESP/RSP/R12 == 4 + if (baseReg == 4) + { + uint8_t mod = 0; + + if (disp) + { + mod = Util::isInt8(disp) ? 1 : 2; + } + + _emitMod(mod, opReg, 4); + _emitSib(0, 4, 4); + + if (disp) + { + if (Util::isInt8(disp)) + _emitByte((int8_t)disp); + else + _emitInt32((int32_t)disp); + } + } + // EBP/RBP/R13 == 5 + else if (baseReg != 5 && disp == 0) + { + _emitMod(0, opReg, baseReg); + } + else if (Util::isInt8(disp)) + { + _emitMod(1, opReg, baseReg); + _emitByte((int8_t)disp); + } + else + { + _emitMod(2, opReg, baseReg); + _emitInt32((int32_t)disp); + } + } + + // [base + index * scale + displacemnt] + else + { + // ASMJIT_ASSERT(indexReg != RID_ESP); + + // EBP/RBP/R13 == 5 + if (baseReg != 5 && disp == 0) + { + _emitMod(0, opReg, 4); + _emitSib(shift, indexReg, baseReg); + } + else if (Util::isInt8(disp)) + { + _emitMod(1, opReg, 4); + _emitSib(shift, indexReg, baseReg); + _emitByte((int8_t)disp); + } + else + { + _emitMod(2, opReg, 4); + _emitSib(shift, indexReg, baseReg); + _emitInt32((int32_t)disp); + } + } + } + + // Address | 32-bit mode | 64-bit mode + // ------------------------------+-------------+--------------- + // [displacement] | ABSOLUTE | RELATIVE (RIP) + // [index * scale + displacemnt] | ABSOLUTE | ABSOLUTE (ZERO EXTENDED) + else + { + // - In 32-bit mode the absolute addressing model is used. + // - In 64-bit mode the relative addressing model is used together with + // the absolute addressing. Main problem is that if instruction + // contains SIB then relative addressing (RIP) is not possible. + +#if defined(ASMJIT_X86) + + if (mem.hasIndex()) + { + // ASMJIT_ASSERT(mem.getMemIndex() != 4); // ESP/RSP == 4 + _emitMod(0, opReg, 4); + _emitSib(shift, indexReg, 5); + } + else + { + _emitMod(0, opReg, 5); + } + + // X86 uses absolute addressing model, all relative addresses will be + // relocated to absolute ones. + if (mem.getMemType() == OPERAND_MEM_LABEL) + { + LabelData& l_data = _labelData[mem._mem.base & OPERAND_ID_VALUE_MASK]; + RelocData r_data; + uint32_t relocId = _relocData.getLength(); + + // Relative addressing will be relocated to absolute address. + r_data.type = RelocData::RELATIVE_TO_ABSOLUTE; + r_data.size = 4; + r_data.offset = getOffset(); + r_data.destination = disp; + + if (l_data.offset != -1) + { + // Bound label. + r_data.destination += l_data.offset; + + // Add a dummy DWORD. + _emitInt32(0); + } + else + { + // Non-bound label. + _emitDisplacement(l_data, -4 - immSize, 4)->relocId = relocId; + } + + _relocData.append(r_data); + } + else + { + // Absolute address + _emitInt32( (int32_t)((uint8_t*)mem._mem.target + disp) ); + } + +#else + + // X64 uses relative addressing model + if (mem.getMemType() == OPERAND_MEM_LABEL) + { + LabelData& l_data = _labelData[mem._mem.base & OPERAND_ID_VALUE_MASK]; + + if (mem.hasIndex()) + { + // Indexing is not possible. + setError(ERROR_ILLEGAL_ADDRESING); + return; + } + + // Relative address (RIP +/- displacement). + _emitMod(0, opReg, 5); + + disp -= (4 + immSize); + + if (l_data.offset != -1) + { + // Bound label. + disp += getOffset() - l_data.offset; + + // Add a dummy DWORD. + _emitInt32((int32_t)disp); + } + else + { + // Non-bound label. + _emitDisplacement(l_data, disp, 4); + } + } + else + { + // Absolute address (truncated to 32-bits), this kind of address requires + // SIB byte (4). + _emitMod(0, opReg, 4); + + if (mem.hasIndex()) + { + // ASMJIT_ASSERT(mem.getMemIndex() != 4); // ESP/RSP == 4 + _emitSib(shift, indexReg, 5); + } + else + { + _emitSib(0, 4, 5); + } + + // Truncate to 32-bits. + sysuint_t target = (sysuint_t)((uint8_t*)mem._mem.target + disp); + + if (target > (sysuint_t)0xFFFFFFFF) + { + if (_logger) + { + _logger->logString("*** ASSEMBER WARNING - Absolute address truncated to 32-bits.\n"); + } + target &= 0xFFFFFFFF; + } + + _emitInt32( (int32_t)((uint32_t)target) ); + } + +#endif // ASMJIT_X64 + + } +} + +void AssemblerCore::_emitModRM( + uint8_t opReg, const Operand& op, sysint_t immSize) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(op.getType() == OPERAND_REG || op.getType() == OPERAND_MEM); + + if (op.getType() == OPERAND_REG) + _emitModR(opReg, reinterpret_cast<const BaseReg&>(op).getRegCode()); + else + _emitModM(opReg, reinterpret_cast<const Mem&>(op), immSize); +} + +void AssemblerCore::_emitX86Inl( + uint32_t opCode, uint8_t i16bit, uint8_t rexw, uint8_t reg, bool forceRexPrefix) ASMJIT_NOTHROW +{ + // 16-bit prefix. + if (i16bit) _emitByte(0x66); + + // Instruction prefix. + if (opCode & 0xFF000000) _emitByte((uint8_t)((opCode & 0xFF000000) >> 24)); + + // REX prefix. +#if defined(ASMJIT_X64) + _emitRexR(rexw, 0, reg, forceRexPrefix); +#endif // ASMJIT_X64 + + // Instruction opcodes. + if (opCode & 0x00FF0000) _emitByte((uint8_t)((opCode & 0x00FF0000) >> 16)); + if (opCode & 0x0000FF00) _emitByte((uint8_t)((opCode & 0x0000FF00) >> 8)); + + _emitByte((uint8_t)(opCode & 0x000000FF) + (reg & 0x7)); +} + +void AssemblerCore::_emitX86RM( + uint32_t opCode, uint8_t i16bit, uint8_t rexw, uint8_t o, + const Operand& op, sysint_t immSize, bool forceRexPrefix) ASMJIT_NOTHROW +{ + // 16-bit prefix. + if (i16bit) _emitByte(0x66); + + // Segment prefix. + _emitSegmentPrefix(op); + + // Instruction prefix. + if (opCode & 0xFF000000) _emitByte((uint8_t)((opCode & 0xFF000000) >> 24)); + + // REX prefix. +#if defined(ASMJIT_X64) + _emitRexRM(rexw, o, op, forceRexPrefix); +#endif // ASMJIT_X64 + + // Instruction opcodes. + if (opCode & 0x00FF0000) _emitByte((uint8_t)((opCode & 0x00FF0000) >> 16)); + if (opCode & 0x0000FF00) _emitByte((uint8_t)((opCode & 0x0000FF00) >> 8)); + _emitByte((uint8_t)(opCode & 0x000000FF)); + + // Mod R/M. + _emitModRM(o, op, immSize); +} + +void AssemblerCore::_emitFpu(uint32_t opCode) ASMJIT_NOTHROW +{ + _emitOpCode(opCode); +} + +void AssemblerCore::_emitFpuSTI(uint32_t opCode, uint32_t sti) ASMJIT_NOTHROW +{ + // Illegal stack offset. + ASMJIT_ASSERT(0 <= sti && sti < 8); + _emitOpCode(opCode + sti); +} + +void AssemblerCore::_emitFpuMEM(uint32_t opCode, uint8_t opReg, const Mem& mem) ASMJIT_NOTHROW +{ + // Segment prefix. + _emitSegmentPrefix(mem); + + // Instruction prefix. + if (opCode & 0xFF000000) _emitByte((uint8_t)((opCode & 0xFF000000) >> 24)); + + // REX prefix. +#if defined(ASMJIT_X64) + _emitRexRM(0, opReg, mem, false); +#endif // ASMJIT_X64 + + // Instruction opcodes. + if (opCode & 0x00FF0000) _emitByte((uint8_t)((opCode & 0x00FF0000) >> 16)); + if (opCode & 0x0000FF00) _emitByte((uint8_t)((opCode & 0x0000FF00) >> 8)); + + _emitByte((uint8_t)((opCode & 0x000000FF))); + _emitModM(opReg, mem, 0); +} + +void AssemblerCore::_emitMmu(uint32_t opCode, uint8_t rexw, uint8_t opReg, + const Operand& src, sysint_t immSize) ASMJIT_NOTHROW +{ + // Segment prefix. + _emitSegmentPrefix(src); + + // Instruction prefix. + if (opCode & 0xFF000000) _emitByte((uint8_t)((opCode & 0xFF000000) >> 24)); + + // REX prefix. +#if defined(ASMJIT_X64) + _emitRexRM(rexw, opReg, src, false); +#endif // ASMJIT_X64 + + // Instruction opcodes. + if (opCode & 0x00FF0000) _emitByte((uint8_t)((opCode & 0x00FF0000) >> 16)); + + // No checking, MMX/SSE instructions have always two opcodes or more. + _emitByte((uint8_t)((opCode & 0x0000FF00) >> 8)); + _emitByte((uint8_t)((opCode & 0x000000FF))); + + if (src.isReg()) + _emitModR(opReg, reinterpret_cast<const BaseReg&>(src).getRegCode()); + else + _emitModM(opReg, reinterpret_cast<const Mem&>(src), immSize); +} + +AssemblerCore::LabelLink* AssemblerCore::_emitDisplacement( + LabelData& l_data, sysint_t inlinedDisplacement, int size) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(l_data.offset == -1); + ASMJIT_ASSERT(size == 1 || size == 4); + + // Chain with label. + LabelLink* link = _newLabelLink(); + link->prev = l_data.links; + link->offset = getOffset(); + link->displacement = inlinedDisplacement; + + l_data.links = link; + + // Emit label size as dummy data. + if (size == 1) + _emitByte(0x01); + else // if (size == 4) + _emitDWord(0x04040404); + + return link; +} + +void AssemblerCore::_emitJmpOrCallReloc(uint32_t instruction, void* target) ASMJIT_NOTHROW +{ + RelocData rd; + + rd.type = RelocData::ABSOLUTE_TO_RELATIVE_TRAMPOLINE; + +#if defined(ASMJIT_X64) + // If we are compiling in 64-bit mode, we can use trampoline if relative jump + // is not possible. + _trampolineSize += TrampolineWriter::TRAMPOLINE_SIZE; +#endif // ARCHITECTURE_SPECIFIC + + rd.size = 4; + rd.offset = getOffset(); + rd.address = target; + + _relocData.append(rd); + + // Emit dummy 32-bit integer (will be overwritten by relocCode()). + _emitInt32(0); +} + +// Logging helpers. +static const char* operandSize[] = +{ + NULL, + "byte ptr ", + "word ptr ", + NULL, + "dword ptr ", + NULL, + NULL, + NULL, + "qword ptr ", + NULL, + "tword ptr ", + NULL, + NULL, + NULL, + NULL, + NULL, + "dqword ptr " +}; + +static const char segmentName[] = + "\0\0\0\0" + "cs:\0" + "ss:\0" + "ds:\0" + "es:\0" + "fs:\0" + "gs:\0"; + +ASMJIT_HIDDEN char* dumpInstructionName(char* buf, uint32_t code) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(code < _INST_COUNT); + return Util::mycpy(buf, instructionDescription[code].getName()); +} + +ASMJIT_HIDDEN char* dumpRegister(char* buf, uint32_t type, uint32_t index) ASMJIT_NOTHROW +{ + // NE == Not-Encodable. + const char reg8l[] = "al\0\0" "cl\0\0" "dl\0\0" "bl\0\0" "spl\0" "bpl\0" "sil\0" "dil\0" ; + const char reg8h[] = "ah\0\0" "ch\0\0" "dh\0\0" "bh\0\0" "NE\0\0" "NE\0\0" "NE\0\0" "NE\0\0"; + const char reg16[] = "ax\0\0" "cx\0\0" "dx\0\0" "bx\0\0" "sp\0\0" "bp\0\0" "si\0\0" "di\0\0"; + + switch (type) + { + case REG_TYPE_GPB_LO: + if (index < 8) + return buf + sprintf(buf, "%s", ®8l[index*4]); + else + return buf + sprintf(buf, "r%ub", (uint32_t)index); + case REG_TYPE_GPB_HI: + if (index < 4) + return buf + sprintf(buf, "%s", ®8h[index*4]); + else + return buf + sprintf(buf, "%s", "INVALID"); + case REG_TYPE_GPW: + if (index < 8) + return buf + sprintf(buf, "%s", ®16[index*4]); + else + return buf + sprintf(buf, "r%uw", (uint32_t)index); + case REG_TYPE_GPD: + if (index < 8) + return buf + sprintf(buf, "e%s", ®16[index*4]); + else + return buf + sprintf(buf, "r%ud", (uint32_t)index); + case REG_TYPE_GPQ: + if (index < 8) + return buf + sprintf(buf, "r%s", ®16[index*4]); + else + return buf + sprintf(buf, "r%u", (uint32_t)index); + case REG_TYPE_X87: + return buf + sprintf(buf, "st%u", (uint32_t)index); + case REG_TYPE_MM: + return buf + sprintf(buf, "mm%u", (uint32_t)index); + case REG_TYPE_XMM: + return buf + sprintf(buf, "xmm%u", (uint32_t)index); + default: + return buf; + } +} + +ASMJIT_HIDDEN char* dumpOperand(char* buf, const Operand* op) ASMJIT_NOTHROW +{ + if (op->isReg()) + { + const BaseReg& reg = reinterpret_cast<const BaseReg&>(*op); + return dumpRegister(buf, reg.getRegType(), reg.getRegIndex()); + } + else if (op->isMem()) + { + bool isAbsolute = false; + const Mem& mem = reinterpret_cast<const Mem&>(*op); + + if (op->getSize() <= 16) + { + buf = Util::mycpy(buf, operandSize[op->getSize()]); + } + + buf = Util::mycpy(buf, &segmentName[mem.getSegmentPrefix() * 4]); + + *buf++ = '['; + + switch (mem.getMemType()) + { + case OPERAND_MEM_NATIVE: + { + // [base + index*scale + displacement] + buf = dumpRegister(buf, REG_TYPE_GPN, mem.getBase()); + break; + } + case OPERAND_MEM_LABEL: + { + // [label + index*scale + displacement] + buf += sprintf(buf, "L.%u", mem.getBase() & OPERAND_ID_VALUE_MASK); + break; + } + case OPERAND_MEM_ABSOLUTE: + { + // [absolute] + isAbsolute = true; + buf = Util::myutoa(buf, (sysuint_t)mem.getTarget(), 16); + break; + } + } + + if (mem.hasIndex()) + { + buf = Util::mycpy(buf, " + "); + buf = dumpRegister(buf, REG_TYPE_GPN, mem.getIndex()); + + if (mem.getShift()) + { + buf = Util::mycpy(buf, " * "); + *buf++ = "1248"[mem.getShift() & 3]; + } + } + + if (mem.getDisplacement() && !isAbsolute) + { + sysint_t d = mem.getDisplacement(); + *buf++ = ' '; + *buf++ = (d < 0) ? '-' : '+'; + *buf++ = ' '; + buf = Util::myutoa(buf, d < 0 ? -d : d); + } + + *buf++ = ']'; + return buf; + } + else if (op->isImm()) + { + const Imm& i = reinterpret_cast<const Imm&>(*op); + return Util::myitoa(buf, (sysint_t)i.getValue()); + } + else if (op->isLabel()) + { + return buf + sprintf(buf, "L.%u", op->getId() & OPERAND_ID_VALUE_MASK); + } + else + { + return Util::mycpy(buf, "None"); + } +} + +static char* dumpInstruction(char* buf, + uint32_t code, + uint32_t emitOptions, + const Operand* o0, + const Operand* o1, + const Operand* o2) ASMJIT_NOTHROW +{ + if (emitOptions & EMIT_OPTION_REX_PREFIX ) buf = Util::mycpy(buf, "rex ", 4); + if (emitOptions & EMIT_OPTION_LOCK_PREFIX) buf = Util::mycpy(buf, "lock ", 5); + if (emitOptions & EMIT_OPTION_SHORT_JUMP ) buf = Util::mycpy(buf, "short ", 6); + + // Dump instruction. + buf = dumpInstructionName(buf, code); + + // Dump operands. + if (!o0->isNone()) { *buf++ = ' '; buf = dumpOperand(buf, o0); } + if (!o1->isNone()) { *buf++ = ','; *buf++ = ' '; buf = dumpOperand(buf, o1); } + if (!o2->isNone()) { *buf++ = ','; *buf++ = ' '; buf = dumpOperand(buf, o2); } + + return buf; +} + +static char* dumpComment(char* buf, sysuint_t len, const uint8_t* binaryData, sysuint_t binaryLen, const char* comment) +{ + sysuint_t currentLength = len; + sysuint_t commentLength = comment ? strlen(comment) : 0; + + if (binaryLen || commentLength) + { + sysuint_t align = 32; + char sep = ';'; + + // Truncate if comment is too long (it shouldn't be, larger than 80 seems to + // be an exploit). + if (commentLength > 80) commentLength = 80; + + for (sysuint_t i = (binaryLen == 0); i < 2; i++) + { + char* bufBegin = buf; + + // Append align. + if (currentLength < align) + { + buf = Util::myfill(buf, ' ', align - currentLength); + } + + // Append separator. + if (sep) + { + *buf++ = sep; + *buf++ = ' '; + } + + // Append binary data or comment. + if (i == 0) + { + buf = Util::myhex(buf, binaryData, binaryLen); + if (commentLength == 0) break; + } + else + { + buf = Util::mycpy(buf, comment, commentLength); + } + + currentLength += (sysuint_t)(buf - bufBegin); + align += 18; + sep = '|'; + } + } + + *buf++ = '\n'; + return buf; +} + +// Used for NULL operands to translate them to OPERAND_NONE. +static const uint8_t _none[sizeof(Operand)] = +{ + 0 +}; + +static const Operand::RegData _patchedHiRegs[4] = +{// op , size, { reserved0, reserved1 }, id , code + { OPERAND_REG, 1 , { 0 , 0 }, INVALID_VALUE, REG_TYPE_GPB_LO | 4 }, + { OPERAND_REG, 1 , { 0 , 0 }, INVALID_VALUE, REG_TYPE_GPB_LO | 5 }, + { OPERAND_REG, 1 , { 0 , 0 }, INVALID_VALUE, REG_TYPE_GPB_LO | 6 }, + { OPERAND_REG, 1 , { 0 , 0 }, INVALID_VALUE, REG_TYPE_GPB_LO | 7 } +}; + +void AssemblerCore::_emitInstruction(uint32_t code) ASMJIT_NOTHROW +{ + _emitInstruction(code, NULL, NULL, NULL); +} + +void AssemblerCore::_emitInstruction(uint32_t code, const Operand* o0) ASMJIT_NOTHROW +{ + _emitInstruction(code, o0, NULL, NULL); +} + +void AssemblerCore::_emitInstruction(uint32_t code, const Operand* o0, const Operand* o1) ASMJIT_NOTHROW +{ + _emitInstruction(code, o0, o1, NULL); +} + +void AssemblerCore::_emitInstruction(uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2) ASMJIT_NOTHROW +{ + const Operand* _loggerOperands[3]; + + uint32_t bLoHiUsed = 0; +#if defined(ASMJIT_X86) + uint32_t forceRexPrefix = false; +#else + uint32_t forceRexPrefix = _emitOptions & EMIT_OPTION_REX_PREFIX; +#endif + +#if defined(ASMJIT_DEBUG) + bool assertIllegal = false; +#endif // ASMJIT_DEBUG + + const Imm* immOperand = NULL; + uint32_t immSize; + +#define _FINISHED() \ + goto end + +#define _FINISHED_IMMEDIATE(_Operand_, _Size_) \ + do { \ + immOperand = reinterpret_cast<const Imm*>(_Operand_); \ + immSize = (_Size_); \ + goto emitImmediate; \ + } while (0) + + // Convert operands to OPERAND_NONE if needed. + if (o0 == NULL) { o0 = reinterpret_cast<const Operand*>(_none); } else if (o0->isReg()) { bLoHiUsed |= o0->_reg.code & (REG_TYPE_GPB_LO | REG_TYPE_GPB_HI); } + if (o1 == NULL) { o1 = reinterpret_cast<const Operand*>(_none); } else if (o1->isReg()) { bLoHiUsed |= o1->_reg.code & (REG_TYPE_GPB_LO | REG_TYPE_GPB_HI); } + if (o2 == NULL) { o2 = reinterpret_cast<const Operand*>(_none); } else if (o2->isReg()) { bLoHiUsed |= o2->_reg.code & (REG_TYPE_GPB_LO | REG_TYPE_GPB_HI); } + + sysuint_t beginOffset = getOffset(); + const InstructionDescription* id = &instructionDescription[code]; + + if (code >= _INST_COUNT) + { + setError(ERROR_UNKNOWN_INSTRUCTION); + goto cleanup; + } + + // Check if register operand is BPL, SPL, SIL, DIL and do action that depends + // to current mode: + // - 64-bit: - Force REX prefix. + // + // Check if register operand is AH, BH, CH or DH and do action that depends + // to current mode: + // - 32-bit: - Patch operand index (index += 4), because we are using + // different index what is used in opcode. + // - 64-bit: - Check whether there is REX prefix and raise error if it is. + // - Do the same as in 32-bit mode - patch register index. + // + // NOTE: This is a hit hacky, but I added this to older code-base and I have + // no energy to rewrite it. Maybe in future all of this can be cleaned up! + if (bLoHiUsed | forceRexPrefix) + { + _loggerOperands[0] = o0; + _loggerOperands[1] = o1; + _loggerOperands[2] = o2; + +#if defined(ASMJIT_X64) + // Check if there is register that makes this instruction un-encodable. + + forceRexPrefix |= (uint32_t)o0->isExtendedRegisterUsed(); + forceRexPrefix |= (uint32_t)o1->isExtendedRegisterUsed(); + forceRexPrefix |= (uint32_t)o2->isExtendedRegisterUsed(); + + if (o0->isRegType(REG_TYPE_GPB_LO) && (o0->_reg.code & REG_INDEX_MASK) >= 4) forceRexPrefix = true; + else if (o1->isRegType(REG_TYPE_GPB_LO) && (o1->_reg.code & REG_INDEX_MASK) >= 4) forceRexPrefix = true; + else if (o2->isRegType(REG_TYPE_GPB_LO) && (o2->_reg.code & REG_INDEX_MASK) >= 4) forceRexPrefix = true; + + if ((bLoHiUsed & REG_TYPE_GPB_HI) != 0 && forceRexPrefix) + { + goto illegalInstruction; + } +#endif // ASMJIT_X64 + + // Patch GPB.HI operand index. + if ((bLoHiUsed & REG_TYPE_GPB_HI) != 0) + { + if (o0->isRegType(REG_TYPE_GPB_HI)) o0 = reinterpret_cast<const Operand*>(&_patchedHiRegs[o0->_reg.code & REG_INDEX_MASK]); + if (o1->isRegType(REG_TYPE_GPB_HI)) o1 = reinterpret_cast<const Operand*>(&_patchedHiRegs[o1->_reg.code & REG_INDEX_MASK]); + if (o2->isRegType(REG_TYPE_GPB_HI)) o2 = reinterpret_cast<const Operand*>(&_patchedHiRegs[o2->_reg.code & REG_INDEX_MASK]); + } + } + + // Check for buffer space (and grow if needed). + if (!canEmit()) goto cleanup; + + if (_emitOptions & EMIT_OPTION_LOCK_PREFIX) + { + if (!id->isLockable()) goto illegalInstruction; + _emitByte(0xF0); + } + + switch (id->group) + { + case InstructionDescription::G_EMIT: + { + _emitOpCode(id->opCode[0]); + _FINISHED(); + } + + case InstructionDescription::G_ALU: + { + uint32_t opCode = id->opCode[0]; + uint8_t opReg = (uint8_t)id->opCodeR; + + // Mem <- Reg + if (o0->isMem() && o1->isReg()) + { + _emitX86RM(opCode + (o1->getSize() != 1), + o1->getSize() == 2, + o1->getSize() == 8, + reinterpret_cast<const GPReg&>(*o1).getRegCode(), + reinterpret_cast<const Operand&>(*o0), + 0, forceRexPrefix); + _FINISHED(); + } + + // Reg <- Reg|Mem + if (o0->isReg() && o1->isRegMem()) + { + _emitX86RM(opCode + 2 + (o0->getSize() != 1), + o0->getSize() == 2, + o0->getSize() == 8, + reinterpret_cast<const GPReg&>(*o0).getRegCode(), + reinterpret_cast<const Operand&>(*o1), + 0, forceRexPrefix); + _FINISHED(); + } + + // AL, AX, EAX, RAX register shortcuts + if (o0->isRegIndex(0) && o1->isImm()) + { + if (o0->getSize() == 2) + _emitByte(0x66); // 16-bit. + else if (o0->getSize() == 8) + _emitByte(0x48); // REX.W. + + _emitByte((opReg << 3) | (0x04 + (o0->getSize() != 1))); + _FINISHED_IMMEDIATE(o1, o0->getSize() <= 4 ? o0->getSize() : 4); + } + + if (o0->isRegMem() && o1->isImm()) + { + const Imm& imm = reinterpret_cast<const Imm&>(*o1); + immSize = Util::isInt8(imm.getValue()) ? 1 : (o0->getSize() <= 4 ? o0->getSize() : 4); + + _emitX86RM(id->opCode[1] + (o0->getSize() != 1 ? (immSize != 1 ? 1 : 3) : 0), + o0->getSize() == 2, + o0->getSize() == 8, + opReg, reinterpret_cast<const Operand&>(*o0), + immSize, forceRexPrefix); + _FINISHED_IMMEDIATE(&imm, immSize); + } + + break; + } + + case InstructionDescription::G_BSWAP: + { + if (o0->isReg()) + { + const GPReg& dst = reinterpret_cast<const GPReg&>(*o0); + +#if defined(ASMJIT_X64) + _emitRexR(dst.getRegType() == REG_TYPE_GPQ, 1, dst.getRegCode(), forceRexPrefix); +#endif // ASMJIT_X64 + _emitByte(0x0F); + _emitModR(1, dst.getRegCode()); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_BT: + { + if (o0->isRegMem() && o1->isReg()) + { + const Operand& dst = reinterpret_cast<const Operand&>(*o0); + const GPReg& src = reinterpret_cast<const GPReg&>(*o1); + + _emitX86RM(id->opCode[0], + src.isRegType(REG_TYPE_GPW), + src.isRegType(REG_TYPE_GPQ), + src.getRegCode(), + dst, + 0, forceRexPrefix); + _FINISHED(); + } + + if (o0->isRegMem() && o1->isImm()) + { + const Operand& dst = reinterpret_cast<const Operand&>(*o0); + const Imm& src = reinterpret_cast<const Imm&>(*o1); + + _emitX86RM(id->opCode[1], + dst.getSize() == 2, + dst.getSize() == 8, + (uint8_t)id->opCodeR, + dst, + 1, forceRexPrefix); + _FINISHED_IMMEDIATE(o1, 1); + } + + break; + } + + case InstructionDescription::G_CALL: + { + if (o0->isRegTypeMem(REG_TYPE_GPN)) + { + const Operand& dst = reinterpret_cast<const Operand&>(*o0); + _emitX86RM(0xFF, + 0, + 0, 2, dst, + 0, forceRexPrefix); + _FINISHED(); + } + + if (o0->isImm()) + { + const Imm& imm = reinterpret_cast<const Imm&>(*o0); + _emitByte(0xE8); + _emitJmpOrCallReloc(InstructionDescription::G_CALL, (void*)imm.getValue()); + _FINISHED(); + } + + if (o0->isLabel()) + { + LabelData& l_data = _labelData[reinterpret_cast<const Label*>(o0)->getId() & OPERAND_ID_VALUE_MASK]; + + if (l_data.offset != -1) + { + // Bound label. + static const sysint_t rel32_size = 5; + sysint_t offs = l_data.offset - getOffset(); + + ASMJIT_ASSERT(offs <= 0); + + _emitByte(0xE8); + _emitInt32((int32_t)(offs - rel32_size)); + } + else + { + // Non-bound label. + _emitByte(0xE8); + _emitDisplacement(l_data, -4, 4); + } + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_CRC32: + { + if (o0->isReg() && o1->isRegMem()) + { + const GPReg& dst = reinterpret_cast<const GPReg&>(*o0); + const Operand& src = reinterpret_cast<const Operand&>(*o1); + ASMJIT_ASSERT(dst.getRegType() == REG_TYPE_GPD || dst.getRegType() == REG_TYPE_GPQ); + + _emitX86RM(id->opCode[0] + (src.getSize() != 1), + src.getSize() == 2, + dst.getRegType() == 8, dst.getRegCode(), src, + 0, forceRexPrefix); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_ENTER: + { + if (o0->isImm() && o1->isImm()) + { + _emitByte(0xC8); + _emitWord((uint16_t)(sysuint_t)reinterpret_cast<const Imm&>(*o2).getValue()); + _emitByte((uint8_t )(sysuint_t)reinterpret_cast<const Imm&>(*o1).getValue()); + _FINISHED(); + } + break; + } + + case InstructionDescription::G_IMUL: + { + // 1 operand + if (o0->isRegMem() && o1->isNone() && o2->isNone()) + { + const Operand& src = reinterpret_cast<const Operand&>(*o0); + _emitX86RM(0xF6 + (src.getSize() != 1), + src.getSize() == 2, + src.getSize() == 8, 5, src, + 0, forceRexPrefix); + _FINISHED(); + } + // 2 operands + else if (o0->isReg() && !o1->isNone() && o2->isNone()) + { + const GPReg& dst = reinterpret_cast<const GPReg&>(*o0); + ASMJIT_ASSERT(!dst.isRegType(REG_TYPE_GPW)); + + if (o1->isRegMem()) + { + const Operand& src = reinterpret_cast<const Operand&>(*o1); + + _emitX86RM(0x0FAF, + dst.isRegType(REG_TYPE_GPW), + dst.isRegType(REG_TYPE_GPQ), dst.getRegCode(), src, + 0, forceRexPrefix); + _FINISHED(); + } + else if (o1->isImm()) + { + const Imm& imm = reinterpret_cast<const Imm&>(*o1); + + if (Util::isInt8(imm.getValue())) + { + _emitX86RM(0x6B, + dst.isRegType(REG_TYPE_GPW), + dst.isRegType(REG_TYPE_GPQ), dst.getRegCode(), dst, + 1, forceRexPrefix); + _FINISHED_IMMEDIATE(&imm, 1); + } + else + { + immSize = dst.isRegType(REG_TYPE_GPW) ? 2 : 4; + _emitX86RM(0x69, + dst.isRegType(REG_TYPE_GPW), + dst.isRegType(REG_TYPE_GPQ), dst.getRegCode(), dst, + immSize, forceRexPrefix); + _FINISHED_IMMEDIATE(&imm, immSize); + } + } + } + // 3 operands + else if (o0->isReg() && o1->isRegMem() && o2->isImm()) + { + const GPReg& dst = reinterpret_cast<const GPReg&>(*o0); + const Operand& src = reinterpret_cast<const Operand&>(*o1); + const Imm& imm = reinterpret_cast<const Imm&>(*o2); + + if (Util::isInt8(imm.getValue())) + { + _emitX86RM(0x6B, + dst.isRegType(REG_TYPE_GPW), + dst.isRegType(REG_TYPE_GPQ), dst.getRegCode(), src, + 1, forceRexPrefix); + _FINISHED_IMMEDIATE(&imm, 1); + } + else + { + immSize = dst.isRegType(REG_TYPE_GPW) ? 2 : 4; + _emitX86RM(0x69, + dst.isRegType(REG_TYPE_GPW), + dst.isRegType(REG_TYPE_GPQ), dst.getRegCode(), src, + immSize, forceRexPrefix); + _FINISHED_IMMEDIATE(&imm, immSize); + } + } + + break; + } + + case InstructionDescription::G_INC_DEC: + { + if (o0->isRegMem()) + { + const Operand& dst = reinterpret_cast<const Operand&>(*o0); + + // INC [r16|r32] in 64-bit mode is not encodable. +#if defined(ASMJIT_X86) + if ((dst.isReg()) && (dst.isRegType(REG_TYPE_GPW) || dst.isRegType(REG_TYPE_GPD))) + { + _emitX86Inl(id->opCode[0], + dst.isRegType(REG_TYPE_GPW), + 0, reinterpret_cast<const BaseReg&>(dst).getRegCode(), + false); + _FINISHED(); + } +#endif // ASMJIT_X86 + + _emitX86RM(id->opCode[1] + (dst.getSize() != 1), + dst.getSize() == 2, + dst.getSize() == 8, (uint8_t)id->opCodeR, dst, + 0, forceRexPrefix); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_J: + { + if (o0->isLabel()) + { + LabelData& l_data = _labelData[reinterpret_cast<const Label*>(o0)->getId() & OPERAND_ID_VALUE_MASK]; + + uint32_t hint = (uint32_t)(o1->isImm() ? reinterpret_cast<const Imm&>(*o1).getValue() : 0); + bool isShortJump = (_emitOptions & EMIT_OPTION_SHORT_JUMP) != 0; + + // Emit jump hint if configured for that. + if ((hint & (HINT_TAKEN | HINT_NOT_TAKEN)) && (_properties & (1 << PROPERTY_JUMP_HINTS))) + { + if (hint & HINT_TAKEN) + _emitByte(HINT_BYTE_VALUE_TAKEN); + else if (hint & HINT_NOT_TAKEN) + _emitByte(HINT_BYTE_VALUE_NOT_TAKEN); + } + + if (l_data.offset != -1) + { + // Bound label. + static const sysint_t rel8_size = 2; + static const sysint_t rel32_size = 6; + sysint_t offs = l_data.offset - getOffset(); + + ASMJIT_ASSERT(offs <= 0); + + if (Util::isInt8(offs - rel8_size)) + { + _emitByte(0x70 | (uint8_t)id->opCode[0]); + _emitByte((uint8_t)(int8_t)(offs - rel8_size)); + + // Change the emit options so logger can log instruction correctly. + _emitOptions |= EMIT_OPTION_SHORT_JUMP; + } + else + { + if (isShortJump && _logger) + { + _logger->logString("*** ASSEMBLER WARNING: Emitting long conditional jump, but short jump instruction forced!\n"); + _emitOptions &= ~EMIT_OPTION_SHORT_JUMP; + } + + _emitByte(0x0F); + _emitByte(0x80 | (uint8_t)id->opCode[0]); + _emitInt32((int32_t)(offs - rel32_size)); + } + } + else + { + // Non-bound label. + if (isShortJump) + { + _emitByte(0x70 | (uint8_t)id->opCode[0]); + _emitDisplacement(l_data, -1, 1); + } + else + { + _emitByte(0x0F); + _emitByte(0x80 | (uint8_t)id->opCode[0]); + _emitDisplacement(l_data, -4, 4); + } + } + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_JMP: + { + if (o0->isRegMem()) + { + const Operand& dst = reinterpret_cast<const Operand&>(*o0); + + _emitX86RM(0xFF, + 0, + 0, 4, dst, + 0, forceRexPrefix); + _FINISHED(); + } + + if (o0->isImm()) + { + const Imm& imm = reinterpret_cast<const Imm&>(*o0); + _emitByte(0xE9); + _emitJmpOrCallReloc(InstructionDescription::G_JMP, (void*)imm.getValue()); + _FINISHED(); + } + + if (o0->isLabel()) + { + LabelData& l_data = _labelData[reinterpret_cast<const Label*>(o0)->getId() & OPERAND_ID_VALUE_MASK]; + bool isShortJump = (_emitOptions & EMIT_OPTION_SHORT_JUMP) != 0; + + if (l_data.offset != -1) + { + // Bound label. + const sysint_t rel8_size = 2; + const sysint_t rel32_size = 5; + sysint_t offs = l_data.offset - getOffset(); + + if (Util::isInt8(offs - rel8_size)) + { + _emitByte(0xEB); + _emitByte((uint8_t)(int8_t)(offs - rel8_size)); + + // Change the emit options so logger can log instruction correctly. + _emitOptions |= EMIT_OPTION_SHORT_JUMP; + } + else + { + if (isShortJump) + { + if (_logger) + { + _logger->logString("*** ASSEMBLER WARNING: Emitting long jump, but short jump instruction forced!\n"); + _emitOptions &= ~EMIT_OPTION_SHORT_JUMP; + } + } + + _emitByte(0xE9); + _emitInt32((int32_t)(offs - rel32_size)); + } + } + else + { + // Non-bound label. + if (isShortJump) + { + _emitByte(0xEB); + _emitDisplacement(l_data, -1, 1); + } + else + { + _emitByte(0xE9); + _emitDisplacement(l_data, -4, 4); + } + } + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_LEA: + { + if (o0->isReg() && o1->isMem()) + { + const GPReg& dst = reinterpret_cast<const GPReg&>(*o0); + const Mem& src = reinterpret_cast<const Mem&>(*o1); + _emitX86RM(0x8D, + dst.isRegType(REG_TYPE_GPW), + dst.isRegType(REG_TYPE_GPQ), dst.getRegCode(), src, + 0, forceRexPrefix); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_M: + { + if (o0->isMem()) + { + _emitX86RM(id->opCode[0], 0, (uint8_t)id->opCode[1], (uint8_t)id->opCodeR, reinterpret_cast<const Mem&>(*o0), 0, forceRexPrefix); + _FINISHED(); + } + break; + } + + case InstructionDescription::G_MOV: + { + const Operand& dst = *o0; + const Operand& src = *o1; + + switch (dst.getType() << 4 | src.getType()) + { + // Reg <- Reg/Mem + case (OPERAND_REG << 4) | OPERAND_REG: + { + ASMJIT_ASSERT(src.isRegType(REG_TYPE_GPB_LO) || + src.isRegType(REG_TYPE_GPB_HI) || + src.isRegType(REG_TYPE_GPW ) || + src.isRegType(REG_TYPE_GPD ) || + src.isRegType(REG_TYPE_GPQ ) ); + // ... fall through ... + } + case (OPERAND_REG << 4) | OPERAND_MEM: + { + ASMJIT_ASSERT(dst.isRegType(REG_TYPE_GPB_LO) || + dst.isRegType(REG_TYPE_GPB_HI) || + dst.isRegType(REG_TYPE_GPW ) || + dst.isRegType(REG_TYPE_GPD ) || + dst.isRegType(REG_TYPE_GPQ ) ); + + _emitX86RM(0x0000008A + (dst.getSize() != 1), + dst.isRegType(REG_TYPE_GPW), + dst.isRegType(REG_TYPE_GPQ), + reinterpret_cast<const GPReg&>(dst).getRegCode(), + reinterpret_cast<const Operand&>(src), + 0, forceRexPrefix); + _FINISHED(); + } + + // Reg <- Imm + case (OPERAND_REG << 4) | OPERAND_IMM: + { + const GPReg& dst = reinterpret_cast<const GPReg&>(*o0); + const Imm& src = reinterpret_cast<const Imm&>(*o1); + + // In 64-bit mode the immediate can be 64-bits long if the + // destination operand type is register (otherwise 32-bits). + immSize = dst.getSize(); + +#if defined(ASMJIT_X64) + // Optimize instruction size by using 32-bit immediate if value can + // fit into it. + if (immSize == 8 && Util::isInt32(src.getValue())) + { + _emitX86RM(0xC7, + 0, // 16BIT + 1, // REX.W + 0, // O + dst, + 0, forceRexPrefix); + immSize = 4; + } + else + { +#endif // ASMJIT_X64 + _emitX86Inl((dst.getSize() == 1 ? 0xB0 : 0xB8), + dst.isRegType(REG_TYPE_GPW), + dst.isRegType(REG_TYPE_GPQ), + dst.getRegCode(), forceRexPrefix); +#if defined(ASMJIT_X64) + } +#endif // ASMJIT_X64 + + _FINISHED_IMMEDIATE(&src, immSize); + } + + // Mem <- Reg + case (OPERAND_MEM << 4) | OPERAND_REG: + { + ASMJIT_ASSERT(src.isRegType(REG_TYPE_GPB_LO) || + src.isRegType(REG_TYPE_GPB_HI) || + src.isRegType(REG_TYPE_GPW ) || + src.isRegType(REG_TYPE_GPD ) || + src.isRegType(REG_TYPE_GPQ ) ); + + _emitX86RM(0x88 + (src.getSize() != 1), + src.isRegType(REG_TYPE_GPW), + src.isRegType(REG_TYPE_GPQ), + reinterpret_cast<const GPReg&>(src).getRegCode(), + reinterpret_cast<const Operand&>(dst), + 0, forceRexPrefix); + _FINISHED(); + } + + // Mem <- Imm + case (OPERAND_MEM << 4) | OPERAND_IMM: + { + immSize = dst.getSize() <= 4 ? dst.getSize() : 4; + + _emitX86RM(0xC6 + (dst.getSize() != 1), + dst.getSize() == 2, + dst.getSize() == 8, + 0, + reinterpret_cast<const Operand&>(dst), + immSize, forceRexPrefix); + _FINISHED_IMMEDIATE(&src, immSize); + } + } + + break; + } + + case InstructionDescription::G_MOV_PTR: + { + if ((o0->isReg() && o1->isImm()) || (o0->isImm() && o1->isReg())) + { + bool reverse = o1->getType() == OPERAND_REG; + uint8_t opCode = !reverse ? 0xA0 : 0xA2; + const GPReg& reg = reinterpret_cast<const GPReg&>(!reverse ? *o0 : *o1); + const Imm& imm = reinterpret_cast<const Imm&>(!reverse ? *o1 : *o0); + + if (reg.getRegIndex() != 0) goto illegalInstruction; + + if (reg.isRegType(REG_TYPE_GPW)) _emitByte(0x66); +#if defined(ASMJIT_X64) + _emitRexR(reg.getSize() == 8, 0, 0, forceRexPrefix); +#endif // ASMJIT_X64 + _emitByte(opCode + (reg.getSize() != 1)); + _FINISHED_IMMEDIATE(&imm, sizeof(sysint_t)); + } + + break; + } + + case InstructionDescription::G_MOVSX_MOVZX: + { + if (o0->isReg() && o1->isRegMem()) + { + const GPReg& dst = reinterpret_cast<const GPReg&>(*o0); + const Operand& src = reinterpret_cast<const Operand&>(*o1); + + if (dst.getSize() == 1) goto illegalInstruction; + if (src.getSize() != 1 && src.getSize() != 2) goto illegalInstruction; + if (src.getSize() == 2 && dst.getSize() == 2) goto illegalInstruction; + + _emitX86RM(id->opCode[0] + (src.getSize() != 1), + dst.isRegType(REG_TYPE_GPW), + dst.isRegType(REG_TYPE_GPQ), + dst.getRegCode(), + src, + 0, forceRexPrefix); + _FINISHED(); + } + + break; + } + +#if defined(ASMJIT_X64) + case InstructionDescription::G_MOVSXD: + { + if (o0->isReg() && o1->isRegMem()) + { + const GPReg& dst = reinterpret_cast<const GPReg&>(*o0); + const Operand& src = reinterpret_cast<const Operand&>(*o1); + _emitX86RM(0x00000063, + 0, + 1, dst.getRegCode(), src, + 0, forceRexPrefix); + _FINISHED(); + } + + break; + } +#endif // ASMJIT_X64 + + case InstructionDescription::G_PUSH: + { + // This section is only for immediates, memory/register operands are handled in G_POP. + if (o0->isImm()) + { + const Imm& imm = reinterpret_cast<const Imm&>(*o0); + + if (Util::isInt8(imm.getValue())) + { + _emitByte(0x6A); + _FINISHED_IMMEDIATE(&imm, 1); + } + else + { + _emitByte(0x68); + _FINISHED_IMMEDIATE(&imm, 4); + } + } + + // ... goto G_POP ... + } + + case InstructionDescription::G_POP: + { + if (o0->isReg()) + { + ASMJIT_ASSERT(o0->isRegType(REG_TYPE_GPW) || o0->isRegType(REG_TYPE_GPN)); + _emitX86Inl(id->opCode[0], o0->isRegType(REG_TYPE_GPW), 0, reinterpret_cast<const GPReg&>(*o0).getRegCode(), forceRexPrefix); + _FINISHED(); + } + + if (o0->isMem()) + { + _emitX86RM(id->opCode[1], o0->getSize() == 2, 0, (uint8_t)id->opCodeR, reinterpret_cast<const Operand&>(*o0), 0, forceRexPrefix); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_R_RM: + { + if (o0->isReg() && o1->isRegMem()) + { + const GPReg& dst = reinterpret_cast<const GPReg&>(*o0); + const Operand& src = reinterpret_cast<const Operand&>(*o1); + ASMJIT_ASSERT(dst.getSize() != 1); + + _emitX86RM(id->opCode[0], + dst.getRegType() == REG_TYPE_GPW, + dst.getRegType() == REG_TYPE_GPQ, dst.getRegCode(), src, + 0, forceRexPrefix); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_RM_B: + { + if (o0->isRegMem()) + { + const Operand& op = reinterpret_cast<const Operand&>(*o0); + + // Only BYTE register or BYTE/TYPELESS memory location can be used. + ASMJIT_ASSERT(op.getSize() <= 1); + + _emitX86RM(id->opCode[0], false, false, 0, op, 0, forceRexPrefix); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_RM: + { + if (o0->isRegMem()) + { + const Operand& op = reinterpret_cast<const Operand&>(*o0); + _emitX86RM(id->opCode[0] + (op.getSize() != 1), + op.getSize() == 2, + op.getSize() == 8, (uint8_t)id->opCodeR, op, + 0, forceRexPrefix); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_RM_R: + { + if (o0->isRegMem() && o1->isReg()) + { + const Operand& dst = reinterpret_cast<const Operand&>(*o0); + const GPReg& src = reinterpret_cast<const GPReg&>(*o1); + _emitX86RM(id->opCode[0] + (src.getSize() != 1), + src.getRegType() == REG_TYPE_GPW, + src.getRegType() == REG_TYPE_GPQ, src.getRegCode(), dst, + 0, forceRexPrefix); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_REP: + { + uint32_t opCode = id->opCode[0]; + uint32_t opSize = id->opCode[1]; + + // Emit REP prefix (1 BYTE). + _emitByte(opCode >> 24); + + if (opSize != 1) opCode++; // D, Q and W form. + if (opSize == 2) _emitByte(0x66); // 16-bit prefix. +#if defined(ASMJIT_X64) + else if (opSize == 8) _emitByte(0x48); // REX.W prefix. +#endif // ASMJIT_X64 + + // Emit opcode (1 BYTE). + _emitByte(opCode & 0xFF); + _FINISHED(); + } + + case InstructionDescription::G_RET: + { + if (o0->isNone()) + { + _emitByte(0xC3); + _FINISHED(); + } + else if (o0->isImm()) + { + const Imm& imm = reinterpret_cast<const Imm&>(*o0); + ASMJIT_ASSERT(Util::isUInt16(imm.getValue())); + + if (imm.getValue() == 0) + { + _emitByte(0xC3); + _FINISHED(); + } + else + { + _emitByte(0xC2); + _FINISHED_IMMEDIATE(&imm, 2); + } + } + + break; + } + + case InstructionDescription::G_ROT: + { + if (o0->isRegMem() && (o1->isRegCode(REG_CL) || o1->isImm())) + { + // generate opcode. For these operations is base 0xC0 or 0xD0. + bool useImm8 = o1->isImm() && reinterpret_cast<const Imm&>(*o1).getValue() != 1; + uint32_t opCode = useImm8 ? 0xC0 : 0xD0; + + // size and operand type modifies the opcode + if (o0->getSize() != 1) opCode |= 0x01; + if (o1->getType() == OPERAND_REG) opCode |= 0x02; + + _emitX86RM(opCode, + o0->getSize() == 2, + o0->getSize() == 8, + (uint8_t)id->opCodeR, reinterpret_cast<const Operand&>(*o0), + useImm8 ? 1 : 0, forceRexPrefix); + + if (useImm8) + _FINISHED_IMMEDIATE(o1, 1); + else + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_SHLD_SHRD: + { + if (o0->isRegMem() && o1->isReg() && (o2->isImm() || (o2->isReg() && o2->isRegCode(REG_CL)))) + { + const Operand& dst = reinterpret_cast<const Operand&>(*o0); + const GPReg& src1 = reinterpret_cast<const GPReg&>(*o1); + const Operand& src2 = reinterpret_cast<const Operand&>(*o2); + + ASMJIT_ASSERT(dst.getSize() == src1.getSize()); + + _emitX86RM(id->opCode[0] + src2.isReg(), + src1.isRegType(REG_TYPE_GPW), + src1.isRegType(REG_TYPE_GPQ), + src1.getRegCode(), dst, + src2.isImm() ? 1 : 0, forceRexPrefix); + if (src2.isImm()) + _FINISHED_IMMEDIATE(&src2, 1); + else + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_TEST: + { + if (o0->isRegMem() && o1->isReg()) + { + ASMJIT_ASSERT(o0->getSize() == o1->getSize()); + _emitX86RM(0x84 + (o1->getSize() != 1), + o1->getSize() == 2, o1->getSize() == 8, + reinterpret_cast<const BaseReg&>(*o1).getRegCode(), + reinterpret_cast<const Operand&>(*o0), + 0, forceRexPrefix); + _FINISHED(); + } + + if (o0->isRegIndex(0) && o1->isImm()) + { + immSize = o0->getSize() <= 4 ? o0->getSize() : 4; + + if (o0->getSize() == 2) _emitByte(0x66); // 16-bit. +#if defined(ASMJIT_X64) + _emitRexRM(o0->getSize() == 8, 0, reinterpret_cast<const Operand&>(*o0), forceRexPrefix); +#endif // ASMJIT_X64 + _emitByte(0xA8 + (o0->getSize() != 1)); + _FINISHED_IMMEDIATE(o1, immSize); + } + + if (o0->isRegMem() && o1->isImm()) + { + immSize = o0->getSize() <= 4 ? o0->getSize() : 4; + + if (o0->getSize() == 2) _emitByte(0x66); // 16-bit. + _emitSegmentPrefix(reinterpret_cast<const Operand&>(*o0)); // Segment prefix. +#if defined(ASMJIT_X64) + _emitRexRM(o0->getSize() == 8, 0, reinterpret_cast<const Operand&>(*o0), forceRexPrefix); +#endif // ASMJIT_X64 + _emitByte(0xF6 + (o0->getSize() != 1)); + _emitModRM(0, reinterpret_cast<const Operand&>(*o0), immSize); + _FINISHED_IMMEDIATE(o1, immSize); + } + + break; + } + + case InstructionDescription::G_XCHG: + { + if (o0->isRegMem() && o1->isReg()) + { + const Operand& dst = reinterpret_cast<const Operand&>(*o0); + const GPReg& src = reinterpret_cast<const GPReg&>(*o1); + + if (src.isRegType(REG_TYPE_GPW)) _emitByte(0x66); // 16-bit. + _emitSegmentPrefix(dst); // segment prefix +#if defined(ASMJIT_X64) + _emitRexRM(src.isRegType(REG_TYPE_GPQ), src.getRegCode(), dst, forceRexPrefix); +#endif // ASMJIT_X64 + + // Special opcode for index 0 registers (AX, EAX, RAX vs register). + if ((dst.getType() == OPERAND_REG && dst.getSize() > 1) && + (reinterpret_cast<const GPReg&>(dst).getRegCode() == 0 || + reinterpret_cast<const GPReg&>(src).getRegCode() == 0 )) + { + uint8_t index = reinterpret_cast<const GPReg&>(dst).getRegCode() | src.getRegCode(); + _emitByte(0x90 + index); + _FINISHED(); + } + + _emitByte(0x86 + (src.getSize() != 1)); + _emitModRM(src.getRegCode(), dst, 0); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_MOVBE: + { + if (o0->isReg() && o1->isMem()) + { + _emitX86RM(0x000F38F0, + o0->isRegType(REG_TYPE_GPW), + o0->isRegType(REG_TYPE_GPQ), + reinterpret_cast<const GPReg&>(*o0).getRegCode(), + reinterpret_cast<const Mem&>(*o1), + 0, forceRexPrefix); + _FINISHED(); + } + + if (o0->isMem() && o1->isReg()) + { + _emitX86RM(0x000F38F1, + o1->isRegType(REG_TYPE_GPW), + o1->isRegType(REG_TYPE_GPQ), + reinterpret_cast<const GPReg&>(*o1).getRegCode(), + reinterpret_cast<const Mem&>(*o0), + 0, forceRexPrefix); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_X87_FPU: + { + if (o0->isRegType(REG_TYPE_X87)) + { + uint8_t i1 = reinterpret_cast<const X87Reg&>(*o0).getRegIndex(); + uint8_t i2 = 0; + + if (code != INST_FCOM && code != INST_FCOMP) + { + if (!o1->isRegType(REG_TYPE_X87)) goto illegalInstruction; + i2 = reinterpret_cast<const X87Reg&>(*o1).getRegIndex(); + } + else if (i1 != 0 && i2 != 0) + { + goto illegalInstruction; + } + + _emitByte(i1 == 0 + ? ((id->opCode[0] & 0xFF000000) >> 24) + : ((id->opCode[0] & 0x00FF0000) >> 16)); + _emitByte(i1 == 0 + ? ((id->opCode[0] & 0x0000FF00) >> 8) + i2 + : ((id->opCode[0] & 0x000000FF) ) + i1); + _FINISHED(); + } + + if (o0->isMem() && (o0->getSize() == 4 || o0->getSize() == 8) && o1->isNone()) + { + const Mem& m = reinterpret_cast<const Mem&>(*o0); + + // segment prefix + _emitSegmentPrefix(m); + + _emitByte(o0->getSize() == 4 + ? ((id->opCode[0] & 0xFF000000) >> 24) + : ((id->opCode[0] & 0x00FF0000) >> 16)); + _emitModM((uint8_t)id->opCodeR, m, 0); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_X87_STI: + { + if (o0->isRegType(REG_TYPE_X87)) + { + uint8_t i = reinterpret_cast<const X87Reg&>(*o0).getRegIndex(); + _emitByte((uint8_t)((id->opCode[0] & 0x0000FF00) >> 8)); + _emitByte((uint8_t)((id->opCode[0] & 0x000000FF) + i)); + _FINISHED(); + } + break; + } + + case InstructionDescription::G_X87_FSTSW: + { + if (o0->isReg() && + reinterpret_cast<const BaseReg&>(*o0).getRegType() <= REG_TYPE_GPQ && + reinterpret_cast<const BaseReg&>(*o0).getRegIndex() == 0) + { + _emitOpCode(id->opCode[1]); + _FINISHED(); + } + + if (o0->isMem()) + { + _emitX86RM(id->opCode[0], 0, 0, (uint8_t)id->opCodeR, reinterpret_cast<const Mem&>(*o0), 0, forceRexPrefix); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_X87_MEM_STI: + { + if (o0->isRegType(REG_TYPE_X87)) + { + _emitByte((uint8_t)((id->opCode[1] & 0xFF000000) >> 24)); + _emitByte((uint8_t)((id->opCode[1] & 0x00FF0000) >> 16) + + reinterpret_cast<const X87Reg&>(*o0).getRegIndex()); + _FINISHED(); + } + + // ... fall through to G_X87_MEM ... + } + + case InstructionDescription::G_X87_MEM: + { + if (!o0->isMem()) goto illegalInstruction; + const Mem& m = reinterpret_cast<const Mem&>(*o0); + + uint8_t opCode = 0x00, mod = 0; + + if (o0->getSize() == 2 && (id->oflags[0] & InstructionDescription::O_FM_2)) + { + opCode = (uint8_t)((id->opCode[0] & 0xFF000000) >> 24); + mod = (uint8_t)id->opCodeR; + } + if (o0->getSize() == 4 && (id->oflags[0] & InstructionDescription::O_FM_4)) + { + opCode = (uint8_t)((id->opCode[0] & 0x00FF0000) >> 16); + mod = (uint8_t)id->opCodeR; + } + if (o0->getSize() == 8 && (id->oflags[0] & InstructionDescription::O_FM_8)) + { + opCode = (uint8_t)((id->opCode[0] & 0x0000FF00) >> 8); + mod = (uint8_t)((id->opCode[0] & 0x000000FF) ); + } + + if (opCode) + { + _emitSegmentPrefix(m); + _emitByte(opCode); + _emitModM(mod, m, 0); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_MMU_MOV: + { + ASMJIT_ASSERT(id->oflags[0] != 0); + ASMJIT_ASSERT(id->oflags[1] != 0); + + // Check parameters (X)MM|GP32_64 <- (X)MM|GP32_64|Mem|Imm + if ((o0->isMem() && (id->oflags[0] & InstructionDescription::O_MEM) == 0) || + (o0->isRegType(REG_TYPE_MM ) && (id->oflags[0] & InstructionDescription::O_MM ) == 0) || + (o0->isRegType(REG_TYPE_XMM) && (id->oflags[0] & InstructionDescription::O_XMM) == 0) || + (o0->isRegType(REG_TYPE_GPD) && (id->oflags[0] & InstructionDescription::O_GD ) == 0) || + (o0->isRegType(REG_TYPE_GPQ) && (id->oflags[0] & InstructionDescription::O_GQ ) == 0) || + (o1->isRegType(REG_TYPE_MM ) && (id->oflags[1] & InstructionDescription::O_MM ) == 0) || + (o1->isRegType(REG_TYPE_XMM) && (id->oflags[1] & InstructionDescription::O_XMM) == 0) || + (o1->isRegType(REG_TYPE_GPD) && (id->oflags[1] & InstructionDescription::O_GD ) == 0) || + (o1->isRegType(REG_TYPE_GPQ) && (id->oflags[1] & InstructionDescription::O_GQ ) == 0) || + (o1->isMem() && (id->oflags[1] & InstructionDescription::O_MEM) == 0) ) + { + goto illegalInstruction; + } + + // Illegal. + if (o0->isMem() && o1->isMem()) goto illegalInstruction; + + uint8_t rexw = ((id->oflags[0]|id->oflags[1]) & InstructionDescription::O_NOREX) + ? 0 + : o0->isRegType(REG_TYPE_GPQ) | o0->isRegType(REG_TYPE_GPQ); + + // (X)MM|Reg <- (X)MM|Reg + if (o0->isReg() && o1->isReg()) + { + _emitMmu(id->opCode[0], rexw, + reinterpret_cast<const BaseReg&>(*o0).getRegCode(), + reinterpret_cast<const BaseReg&>(*o1), + 0); + _FINISHED(); + } + + // (X)MM|Reg <- Mem + if (o0->isReg() && o1->isMem()) + { + _emitMmu(id->opCode[0], rexw, + reinterpret_cast<const BaseReg&>(*o0).getRegCode(), + reinterpret_cast<const Mem&>(*o1), + 0); + _FINISHED(); + } + + // Mem <- (X)MM|Reg + if (o0->isMem() && o1->isReg()) + { + _emitMmu(id->opCode[1], rexw, + reinterpret_cast<const BaseReg&>(*o1).getRegCode(), + reinterpret_cast<const Mem&>(*o0), + 0); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_MMU_MOVD: + { + if ((o0->isRegType(REG_TYPE_MM) || o0->isRegType(REG_TYPE_XMM)) && (o1->isRegType(REG_TYPE_GPD) || o1->isMem())) + { + _emitMmu(o0->isRegType(REG_TYPE_XMM) ? 0x66000F6E : 0x00000F6E, 0, + reinterpret_cast<const BaseReg&>(*o0).getRegCode(), + reinterpret_cast<const Operand&>(*o1), + 0); + _FINISHED(); + } + + if ((o0->isRegType(REG_TYPE_GPD) || o0->isMem()) && (o1->isRegType(REG_TYPE_MM) || o1->isRegType(REG_TYPE_XMM))) + { + _emitMmu(o1->isRegType(REG_TYPE_XMM) ? 0x66000F7E : 0x00000F7E, 0, + reinterpret_cast<const BaseReg&>(*o1).getRegCode(), + reinterpret_cast<const Operand&>(*o0), + 0); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_MMU_MOVQ: + { + if (o0->isRegType(REG_TYPE_MM) && o1->isRegType(REG_TYPE_MM)) + { + _emitMmu(0x00000F6F, 0, + reinterpret_cast<const MMReg&>(*o0).getRegCode(), + reinterpret_cast<const MMReg&>(*o1), + 0); + _FINISHED(); + } + + if (o0->isRegType(REG_TYPE_XMM) && o1->isRegType(REG_TYPE_XMM)) + { + _emitMmu(0xF3000F7E, 0, + reinterpret_cast<const XMMReg&>(*o0).getRegCode(), + reinterpret_cast<const XMMReg&>(*o1), + 0); + _FINISHED(); + } + + // Convenience - movdq2q + if (o0->isRegType(REG_TYPE_MM) && o1->isRegType(REG_TYPE_XMM)) + { + _emitMmu(0xF2000FD6, 0, + reinterpret_cast<const MMReg&>(*o0).getRegCode(), + reinterpret_cast<const XMMReg&>(*o1), + 0); + _FINISHED(); + } + + // Convenience - movq2dq + if (o0->isRegType(REG_TYPE_XMM) && o1->isRegType(REG_TYPE_MM)) + { + _emitMmu(0xF3000FD6, 0, + reinterpret_cast<const XMMReg&>(*o0).getRegCode(), + reinterpret_cast<const MMReg&>(*o1), + 0); + _FINISHED(); + } + + if (o0->isRegType(REG_TYPE_MM) && o1->isMem()) + { + _emitMmu(0x00000F6F, 0, + reinterpret_cast<const MMReg&>(*o0).getRegCode(), + reinterpret_cast<const Mem&>(*o1), + 0); + _FINISHED(); + } + + if (o0->isRegType(REG_TYPE_XMM) && o1->isMem()) + { + _emitMmu(0xF3000F7E, 0, + reinterpret_cast<const XMMReg&>(*o0).getRegCode(), + reinterpret_cast<const Mem&>(*o1), + 0); + _FINISHED(); + } + + if (o0->isMem() && o1->isRegType(REG_TYPE_MM)) + { + _emitMmu(0x00000F7F, 0, + reinterpret_cast<const MMReg&>(*o1).getRegCode(), + reinterpret_cast<const Mem&>(*o0), + 0); + _FINISHED(); + } + + if (o0->isMem() && o1->isRegType(REG_TYPE_XMM)) + { + _emitMmu(0x66000FD6, 0, + reinterpret_cast<const XMMReg&>(*o1).getRegCode(), + reinterpret_cast<const Mem&>(*o0), + 0); + _FINISHED(); + } + +#if defined(ASMJIT_X64) + if ((o0->isRegType(REG_TYPE_MM) || o0->isRegType(REG_TYPE_XMM)) && (o1->isRegType(REG_TYPE_GPQ) || o1->isMem())) + { + _emitMmu(o0->isRegType(REG_TYPE_XMM) ? 0x66000F6E : 0x00000F6E, 1, + reinterpret_cast<const BaseReg&>(*o0).getRegCode(), + reinterpret_cast<const Operand&>(*o1), + 0); + _FINISHED(); + } + + if ((o0->isRegType(REG_TYPE_GPQ) || o0->isMem()) && (o1->isRegType(REG_TYPE_MM) || o1->isRegType(REG_TYPE_XMM))) + { + _emitMmu(o1->isRegType(REG_TYPE_XMM) ? 0x66000F7E : 0x00000F7E, 1, + reinterpret_cast<const BaseReg&>(*o1).getRegCode(), + reinterpret_cast<const Operand&>(*o0), + 0); + _FINISHED(); + } +#endif // ASMJIT_X64 + + break; + } + + case InstructionDescription::G_MMU_PREFETCH: + { + if (o0->isMem() && o1->isImm()) + { + const Mem& mem = reinterpret_cast<const Mem&>(*o0); + const Imm& hint = reinterpret_cast<const Imm&>(*o1); + + _emitMmu(0x00000F18, 0, (uint8_t)hint.getValue(), mem, 0); + _FINISHED(); + } + + break; + } + + case InstructionDescription::G_MMU_PEXTR: + { + if (!(o0->isRegMem() && + (o1->isRegType(REG_TYPE_XMM) || (code == INST_PEXTRW && o1->isRegType(REG_TYPE_MM))) && + o2->isImm())) + { + goto illegalInstruction; + } + + uint32_t opCode = id->opCode[0]; + uint8_t isGpdGpq = o0->isRegType(REG_TYPE_GPD) | o0->isRegType(REG_TYPE_GPQ); + + if (code == INST_PEXTRB && (o0->getSize() != 0 && o0->getSize() != 1) && !isGpdGpq) goto illegalInstruction; + if (code == INST_PEXTRW && (o0->getSize() != 0 && o0->getSize() != 2) && !isGpdGpq) goto illegalInstruction; + if (code == INST_PEXTRD && (o0->getSize() != 0 && o0->getSize() != 4) && !isGpdGpq) goto illegalInstruction; + if (code == INST_PEXTRQ && (o0->getSize() != 0 && o0->getSize() != 8) && !isGpdGpq) goto illegalInstruction; + + if (o1->isRegType(REG_TYPE_XMM)) opCode |= 0x66000000; + + if (o0->isReg()) + { + _emitMmu(opCode, id->opCodeR | (uint8_t)o0->isRegType(REG_TYPE_GPQ), + reinterpret_cast<const BaseReg&>(*o1).getRegCode(), + reinterpret_cast<const BaseReg&>(*o0), 1); + _FINISHED_IMMEDIATE(o2, 1); + } + + if (o0->isMem()) + { + _emitMmu(opCode, (uint8_t)id->opCodeR, + reinterpret_cast<const BaseReg&>(*o1).getRegCode(), + reinterpret_cast<const Mem&>(*o0), 1); + _FINISHED_IMMEDIATE(o2, 1); + } + + break; + } + + case InstructionDescription::G_MMU_RMI: + { + ASMJIT_ASSERT(id->oflags[0] != 0); + ASMJIT_ASSERT(id->oflags[1] != 0); + + // Check parameters (X)MM|GP32_64 <- (X)MM|GP32_64|Mem|Imm + if (!o0->isReg() || + (o0->isRegType(REG_TYPE_MM ) && (id->oflags[0] & InstructionDescription::O_MM ) == 0) || + (o0->isRegType(REG_TYPE_XMM) && (id->oflags[0] & InstructionDescription::O_XMM) == 0) || + (o0->isRegType(REG_TYPE_GPD) && (id->oflags[0] & InstructionDescription::O_GD ) == 0) || + (o0->isRegType(REG_TYPE_GPQ) && (id->oflags[0] & InstructionDescription::O_GQ ) == 0) || + (o1->isRegType(REG_TYPE_MM ) && (id->oflags[1] & InstructionDescription::O_MM ) == 0) || + (o1->isRegType(REG_TYPE_XMM) && (id->oflags[1] & InstructionDescription::O_XMM) == 0) || + (o1->isRegType(REG_TYPE_GPD) && (id->oflags[1] & InstructionDescription::O_GD ) == 0) || + (o1->isRegType(REG_TYPE_GPQ) && (id->oflags[1] & InstructionDescription::O_GQ ) == 0) || + (o1->isMem() && (id->oflags[1] & InstructionDescription::O_MEM) == 0) || + (o1->isImm() && (id->oflags[1] & InstructionDescription::O_IMM) == 0)) + { + goto illegalInstruction; + } + + uint32_t prefix = + ((id->oflags[0] & InstructionDescription::O_MM_XMM) == InstructionDescription::O_MM_XMM && o0->isRegType(REG_TYPE_XMM)) || + ((id->oflags[1] & InstructionDescription::O_MM_XMM) == InstructionDescription::O_MM_XMM && o1->isRegType(REG_TYPE_XMM)) + ? 0x66000000 + : 0x00000000; + uint8_t rexw = ((id->oflags[0]|id->oflags[1]) & InstructionDescription::O_NOREX) + ? 0 + : o0->isRegType(REG_TYPE_GPQ) | o0->isRegType(REG_TYPE_GPQ); + + // (X)MM <- (X)MM (opcode0) + if (o1->isReg()) + { + if ((id->oflags[1] & (InstructionDescription::O_MM_XMM | InstructionDescription::O_GQD)) == 0) goto illegalInstruction; + _emitMmu(id->opCode[0] | prefix, rexw, + reinterpret_cast<const BaseReg&>(*o0).getRegCode(), + reinterpret_cast<const BaseReg&>(*o1), 0); + _FINISHED(); + } + // (X)MM <- Mem (opcode0) + if (o1->isMem()) + { + if ((id->oflags[1] & InstructionDescription::O_MEM) == 0) goto illegalInstruction; + _emitMmu(id->opCode[0] | prefix, rexw, + reinterpret_cast<const BaseReg&>(*o0).getRegCode(), + reinterpret_cast<const Mem&>(*o1), 0); + _FINISHED(); + } + // (X)MM <- Imm (opcode1+opcodeR) + if (o1->isImm()) + { + if ((id->oflags[1] & InstructionDescription::O_IMM) == 0) goto illegalInstruction; + _emitMmu(id->opCode[1] | prefix, rexw, + (uint8_t)id->opCodeR, + reinterpret_cast<const BaseReg&>(*o0), 1); + _FINISHED_IMMEDIATE(o1, 1); + } + + break; + } + + case InstructionDescription::G_MMU_RM_IMM8: + { + ASMJIT_ASSERT(id->oflags[0] != 0); + ASMJIT_ASSERT(id->oflags[1] != 0); + + // Check parameters (X)MM|GP32_64 <- (X)MM|GP32_64|Mem|Imm + if (!o0->isReg() || + (o0->isRegType(REG_TYPE_MM ) && (id->oflags[0] & InstructionDescription::O_MM ) == 0) || + (o0->isRegType(REG_TYPE_XMM) && (id->oflags[0] & InstructionDescription::O_XMM) == 0) || + (o0->isRegType(REG_TYPE_GPD) && (id->oflags[0] & InstructionDescription::O_GD ) == 0) || + (o0->isRegType(REG_TYPE_GPQ) && (id->oflags[0] & InstructionDescription::O_GQ ) == 0) || + (o1->isRegType(REG_TYPE_MM ) && (id->oflags[1] & InstructionDescription::O_MM ) == 0) || + (o1->isRegType(REG_TYPE_XMM) && (id->oflags[1] & InstructionDescription::O_XMM) == 0) || + (o1->isRegType(REG_TYPE_GPD) && (id->oflags[1] & InstructionDescription::O_GD ) == 0) || + (o1->isRegType(REG_TYPE_GPQ) && (id->oflags[1] & InstructionDescription::O_GQ ) == 0) || + (o1->isMem() && (id->oflags[1] & InstructionDescription::O_MEM) == 0) || + !o2->isImm()) + { + goto illegalInstruction; + } + + uint32_t prefix = + ((id->oflags[0] & InstructionDescription::O_MM_XMM) == InstructionDescription::O_MM_XMM && o0->isRegType(REG_TYPE_XMM)) || + ((id->oflags[1] & InstructionDescription::O_MM_XMM) == InstructionDescription::O_MM_XMM && o1->isRegType(REG_TYPE_XMM)) + ? 0x66000000 + : 0x00000000; + uint8_t rexw = ((id->oflags[0]|id->oflags[1]) & InstructionDescription::O_NOREX) + ? 0 + : o0->isRegType(REG_TYPE_GPQ) | o0->isRegType(REG_TYPE_GPQ); + + // (X)MM <- (X)MM (opcode0) + if (o1->isReg()) + { + if ((id->oflags[1] & (InstructionDescription::O_MM_XMM | InstructionDescription::O_GQD)) == 0) goto illegalInstruction; + _emitMmu(id->opCode[0] | prefix, rexw, + reinterpret_cast<const BaseReg&>(*o0).getRegCode(), + reinterpret_cast<const BaseReg&>(*o1), 1); + _FINISHED_IMMEDIATE(o2, 1); + } + // (X)MM <- Mem (opcode0) + if (o1->isMem()) + { + if ((id->oflags[1] & InstructionDescription::O_MEM) == 0) goto illegalInstruction; + _emitMmu(id->opCode[0] | prefix, rexw, + reinterpret_cast<const BaseReg&>(*o0).getRegCode(), + reinterpret_cast<const Mem&>(*o1), 1); + _FINISHED_IMMEDIATE(o2, 1); + } + + break; + } + + case InstructionDescription::G_MMU_RM_3DNOW: + { + if (o0->isRegType(REG_TYPE_MM) && (o1->isRegType(REG_TYPE_MM) || o1->isMem())) + { + _emitMmu(id->opCode[0], 0, + reinterpret_cast<const BaseReg&>(*o0).getRegCode(), + reinterpret_cast<const Mem&>(*o1), 1); + _emitByte((uint8_t)id->opCode[1]); + _FINISHED(); + } + + break; + } + } + +illegalInstruction: + // Set an error. If we run in release mode assertion will be not used, so we + // must inform about invalid state. + setError(ERROR_ILLEGAL_INSTRUCTION); + +#if defined(ASMJIT_DEBUG) + assertIllegal = true; +#endif // ASMJIT_DEBUG + goto end; + +emitImmediate: + { + sysint_t value = immOperand->getValue(); + switch (immSize) + { + case 1: _emitByte ((uint8_t )(sysuint_t)value); break; + case 2: _emitWord ((uint16_t)(sysuint_t)value); break; + case 4: _emitDWord((uint32_t)(sysuint_t)value); break; +#if defined(ASMJIT_X64) + case 8: _emitQWord((uint64_t)(sysuint_t)value); break; +#endif // ASMJIT_X64 + default: ASMJIT_ASSERT(0); + } + } + +end: + if (_logger +#if defined(ASMJIT_DEBUG) + || assertIllegal +#endif // ASMJIT_DEBUG + ) + { + char bufStorage[512]; + char* buf = bufStorage; + + // Detect truncated operand. + Imm immTemporary(0); + + // Use the original operands, because BYTE some of them were replaced. + if (bLoHiUsed) + { + o0 = _loggerOperands[0]; + o1 = _loggerOperands[1]; + o2 = _loggerOperands[2]; + } + + if (immOperand) + { + sysint_t value = immOperand->getValue(); + bool isUnsigned = immOperand->isUnsigned(); + + switch (immSize) + { + case 1: if ( isUnsigned && !Util::isUInt8 (value)) { immTemporary.setValue((uint8_t)(sysuint_t)value, true ); break; } + if (!isUnsigned && !Util::isInt8 (value)) { immTemporary.setValue((uint8_t)(sysuint_t)value, false); break; } + break; + case 2: if ( isUnsigned && !Util::isUInt16(value)) { immTemporary.setValue((uint16_t)(sysuint_t)value, true ); break; } + if (!isUnsigned && !Util::isInt16 (value)) { immTemporary.setValue((uint16_t)(sysuint_t)value, false); break; } + break; + case 4: if ( isUnsigned && !Util::isUInt32(value)) { immTemporary.setValue((uint32_t)(sysuint_t)value, true ); break; } + if (!isUnsigned && !Util::isInt32 (value)) { immTemporary.setValue((uint32_t)(sysuint_t)value, false); break; } + break; + } + + if (immTemporary.getValue() != 0) + { + if (o0 == immOperand) o0 = &immTemporary; + if (o1 == immOperand) o1 = &immTemporary; + if (o2 == immOperand) o2 = &immTemporary; + } + } + + buf = dumpInstruction(buf, code, _emitOptions, o0, o1, o2); + + if (_logger->getLogBinary()) + buf = dumpComment(buf, (sysuint_t)(buf - bufStorage), getCode() + beginOffset, getOffset() - beginOffset, _comment); + else + buf = dumpComment(buf, (sysuint_t)(buf - bufStorage), NULL, 0, _comment); + + // We don't need to NULL terminate the resulting string. +#if defined(ASMJIT_DEBUG) + if (_logger) +#endif // ASMJIT_DEBUG + _logger->logString(bufStorage, (sysuint_t)(buf - bufStorage)); + +#if defined(ASMJIT_DEBUG) + if (assertIllegal) + { + // Here we need to NULL terminate. + buf[0] = '\0'; + + // Raise an assertion failure, because this situation shouldn't happen. + assertionFailure(__FILE__, __LINE__, bufStorage); + } +#endif // ASMJIT_DEBUG + } + +cleanup: + _comment = NULL; + _emitOptions = 0; +} + +void AssemblerCore::_emitJcc(uint32_t code, const Label* label, uint32_t hint) ASMJIT_NOTHROW +{ + if (!hint) + { + _emitInstruction(code, label, NULL, NULL); + } + else + { + Imm imm(hint); + _emitInstruction(code, label, &imm, NULL); + } +} + +// ============================================================================ +// [AsmJit::AssemblerCore - Relocation helpers] +// ============================================================================ + +sysuint_t AssemblerCore::relocCode(void* _dst, sysuint_t addressBase) const ASMJIT_NOTHROW +{ + // Copy code to virtual memory (this is a given _dst pointer). + uint8_t* dst = reinterpret_cast<uint8_t*>(_dst); + + sysint_t coff = _buffer.getOffset(); + sysint_t csize = getCodeSize(); + + // We are copying the exact size of the generated code. Extra code for trampolines + // is generated on-the-fly by relocator (this code doesn't exist at the moment). + memcpy(dst, _buffer.getData(), coff); + +#if defined(ASMJIT_X64) + // Trampoline pointer. + uint8_t* tramp = dst + coff; +#endif // ASMJIT_X64 + + // Relocate all recorded locations. + sysint_t i; + sysint_t len = _relocData.getLength(); + + for (i = 0; i < len; i++) + { + const RelocData& r = _relocData[i]; + sysint_t val; + +#if defined(ASMJIT_X64) + // Whether to use trampoline, can be only used if relocation type is + // ABSOLUTE_TO_RELATIVE_TRAMPOLINE. + bool useTrampoline = false; +#endif // ASMJIT_X64 + + // Be sure that reloc data structure is correct. + ASMJIT_ASSERT((sysint_t)(r.offset + r.size) <= csize); + + switch (r.type) + { + case RelocData::ABSOLUTE_TO_ABSOLUTE: + val = (sysint_t)(r.address); + break; + + case RelocData::RELATIVE_TO_ABSOLUTE: + val = (sysint_t)(addressBase + r.destination); + break; + + case RelocData::ABSOLUTE_TO_RELATIVE: + case RelocData::ABSOLUTE_TO_RELATIVE_TRAMPOLINE: + val = (sysint_t)( (sysuint_t)r.address - (addressBase + (sysuint_t)r.offset + 4) ); + +#if defined(ASMJIT_X64) + if (r.type == RelocData::ABSOLUTE_TO_RELATIVE_TRAMPOLINE && !Util::isInt32(val)) + { + val = (sysint_t)( (sysuint_t)tramp - ((sysuint_t)_dst + (sysuint_t)r.offset + 4) ); + useTrampoline = true; + } +#endif // ASMJIT_X64 + break; + + default: + ASMJIT_ASSERT(0); + } + + switch (r.size) + { + case 4: + *reinterpret_cast<int32_t*>(dst + r.offset) = (int32_t)val; + break; + + case 8: + *reinterpret_cast<int64_t*>(dst + r.offset) = (int64_t)val; + break; + + default: + ASMJIT_ASSERT(0); + } + +#if defined(ASMJIT_X64) + if (useTrampoline) + { + if (getLogger()) + { + getLogger()->logFormat("; Trampoline from %p -> %p\n", (int8_t*)addressBase + r.offset, r.address); + } + + TrampolineWriter::writeTrampoline(tramp, r.address); + tramp += TrampolineWriter::TRAMPOLINE_SIZE; + } +#endif // ASMJIT_X64 + } + +#if defined(ASMJIT_X64) + return (sysuint_t)(tramp - dst); +#else + return (sysuint_t)coff; +#endif // ASMJIT_X64 +} + +// ============================================================================ +// [AsmJit::AssemblerCore - Embed] +// ============================================================================ + +void AssemblerCore::embed(const void* data, sysuint_t size) ASMJIT_NOTHROW +{ + if (!canEmit()) return; + + if (_logger) + { + sysuint_t i, j; + sysuint_t max; + char buf[128]; + char dot[] = ".data "; + char* p; + + memcpy(buf, dot, ASMJIT_ARRAY_SIZE(dot) - 1); + + for (i = 0; i < size; i += 16) + { + max = (size - i < 16) ? size - i : 16; + p = buf + ASMJIT_ARRAY_SIZE(dot) - 1; + + for (j = 0; j < max; j++) + p += sprintf(p, "%0.2X", reinterpret_cast<const uint8_t *>(data)[i+j]); + + *p++ = '\n'; + *p = '\0'; + + _logger->logString(buf); + } + } + + _buffer.emitData(data, size); +} + +void AssemblerCore::embedLabel(const Label& label) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(label.getId() != INVALID_VALUE); + if (!canEmit()) return; + + LabelData& l_data = _labelData[label.getId() & OPERAND_ID_VALUE_MASK]; + RelocData r_data; + + if (_logger) + { + _logger->logFormat(sizeof(sysint_t) == 4 ? ".dd L.%u\n" : ".dq L.%u\n", (uint32_t)label.getId() & OPERAND_ID_VALUE_MASK); + } + + r_data.type = RelocData::RELATIVE_TO_ABSOLUTE; + r_data.size = sizeof(sysint_t); + r_data.offset = getOffset(); + r_data.destination = 0; + + if (l_data.offset != -1) + { + // Bound label. + r_data.destination = l_data.offset; + } + else + { + // Non-bound label. Need to chain. + LabelLink* link = _newLabelLink(); + + link->prev = (LabelLink*)l_data.links; + link->offset = getOffset(); + link->displacement = 0; + link->relocId = _relocData.getLength(); + + l_data.links = link; + } + + _relocData.append(r_data); + + // Emit dummy sysint (4 or 8 bytes that depends to address size). + _emitSysInt(0); +} + +// ============================================================================ +// [AsmJit::AssemblerCore - Align] +// ============================================================================ + +void AssemblerCore::align(uint32_t m) ASMJIT_NOTHROW +{ + if (!canEmit()) return; + if (_logger) _logger->logFormat(".align %u", (uint)m); + + if (!m) return; + + if (m > 64) + { + ASMJIT_ASSERT(0); + return; + } + + sysint_t i = m - (getOffset() % m); + if (i == m) return; + + if (_properties & (1 << PROPERTY_OPTIMIZE_ALIGN)) + { + const CpuInfo* ci = getCpuInfo(); + + // NOPs optimized for Intel: + // Intel 64 and IA-32 Architectures Software Developer's Manual + // - Volume 2B + // - Instruction Set Reference N-Z + // - NOP + + // NOPs optimized for AMD: + // Software Optimization Guide for AMD Family 10h Processors (Quad-Core) + // - 4.13 - Code Padding with Operand-Size Override and Multibyte NOP + + // Intel and AMD. + static const uint8_t nop1[] = { 0x90 }; + static const uint8_t nop2[] = { 0x66, 0x90 }; + static const uint8_t nop3[] = { 0x0F, 0x1F, 0x00 }; + static const uint8_t nop4[] = { 0x0F, 0x1F, 0x40, 0x00 }; + static const uint8_t nop5[] = { 0x0F, 0x1F, 0x44, 0x00, 0x00 }; + static const uint8_t nop6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 }; + static const uint8_t nop7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 }; + static const uint8_t nop8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static const uint8_t nop9[] = { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; + + // AMD. + static const uint8_t nop10[] = { 0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static const uint8_t nop11[] = { 0x66, 0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }; + + const uint8_t* p; + sysint_t n; + + if (ci->vendorId == CPU_VENDOR_INTEL && + ((ci->family & 0x0F) == 6 || + (ci->family & 0x0F) == 15) + ) + { + do { + switch (i) + { + case 1: p = nop1; n = 1; break; + case 2: p = nop2; n = 2; break; + case 3: p = nop3; n = 3; break; + case 4: p = nop4; n = 4; break; + case 5: p = nop5; n = 5; break; + case 6: p = nop6; n = 6; break; + case 7: p = nop7; n = 7; break; + case 8: p = nop8; n = 8; break; + default: p = nop9; n = 9; break; + } + + i -= n; + do { _emitByte(*p++); } while(--n); + } while (i); + + return; + } + + if (ci->vendorId == CPU_VENDOR_AMD && + ci->family >= 0x0F) + { + do { + switch (i) + { + case 1: p = nop1 ; n = 1; break; + case 2: p = nop2 ; n = 2; break; + case 3: p = nop3 ; n = 3; break; + case 4: p = nop4 ; n = 4; break; + case 5: p = nop5 ; n = 5; break; + case 6: p = nop6 ; n = 6; break; + case 7: p = nop7 ; n = 7; break; + case 8: p = nop8 ; n = 8; break; + case 9: p = nop9 ; n = 9; break; + case 10: p = nop10; n = 10; break; + default: p = nop11; n = 11; break; + } + + i -= n; + do { _emitByte(*p++); } while(--n); + } while (i); + + return; + } +#if defined(ASMJIT_X86) + // legacy NOPs, 0x90 with 0x66 prefix. + do { + switch (i) + { + default: _emitByte(0x66); i--; + case 3: _emitByte(0x66); i--; + case 2: _emitByte(0x66); i--; + case 1: _emitByte(0x90); i--; + } + } while(i); +#endif + } + + // legacy NOPs, only 0x90 + // In 64-bit mode, we can't use 0x66 prefix + do { + _emitByte(0x90); + } while(--i); +} + +// ============================================================================ +// [AsmJit::AssemblerCore - Label] +// ============================================================================ + +Label AssemblerCore::newLabel() ASMJIT_NOTHROW +{ + Label label; + label._base.id = (uint32_t)_labelData.getLength() | OPERAND_ID_TYPE_LABEL; + + LabelData l_data; + l_data.offset = -1; + l_data.links = NULL; + _labelData.append(l_data); + + return label; +} + +void AssemblerCore::registerLabels(sysuint_t count) ASMJIT_NOTHROW +{ + // Duplicated newLabel() code, but we are not creating Label instances. + LabelData l_data; + l_data.offset = -1; + l_data.links = NULL; + + for (sysuint_t i = 0; i < count; i++) _labelData.append(l_data); +} + +void AssemblerCore::bind(const Label& label) ASMJIT_NOTHROW +{ + // Only labels created by newLabel() can be used by Assembler. + ASMJIT_ASSERT(label.getId() != INVALID_VALUE); + // Never go out of bounds. + ASMJIT_ASSERT((label.getId() & OPERAND_ID_VALUE_MASK) < _labelData.getLength()); + + // Get label data based on label id. + LabelData& l_data = _labelData[label.getId() & OPERAND_ID_VALUE_MASK]; + + // Label can be bound only once. + ASMJIT_ASSERT(l_data.offset == -1); + + // Log. + if (_logger) _logger->logFormat("L.%u:\n", (uint32_t)label.getId() & OPERAND_ID_VALUE_MASK); + + sysint_t pos = getOffset(); + + LabelLink* link = l_data.links; + LabelLink* prev = NULL; + + while (link) + { + sysint_t offset = link->offset; + + if (link->relocId != -1) + { + // If linked label points to RelocData then instead of writing relative + // displacement to assembler stream, we will write it to RelocData. + _relocData[link->relocId].destination += pos; + } + else + { + // Not using relocId, this means that we overwriting real displacement + // in assembler stream. + int32_t patchedValue = (int32_t)(pos - offset + link->displacement); + uint32_t size = getByteAt(offset); + + // Only these size specifiers are allowed. + ASMJIT_ASSERT(size == 1 || size == 4); + + if (size == 4) + { + setInt32At(offset, patchedValue); + } + else // if (size == 1) + { + if (Util::isInt8(patchedValue)) + { + setByteAt(offset, (uint8_t)(int8_t)patchedValue); + } + else + { + // Fatal error. + setError(ERROR_ILLEGAL_SHORT_JUMP); + } + } + } + + prev = link->prev; + link = prev; + } + + // Chain unused links. + link = l_data.links; + if (link) + { + if (prev == NULL) prev = link; + + prev->prev = _unusedLinks; + _unusedLinks = link; + } + + // Unlink label if it was linked. + l_data.offset = pos; + l_data.links = NULL; +} + +// ============================================================================ +// [AsmJit::AssemblerCore - Make] +// ============================================================================ + +void* AssemblerCore::make() ASMJIT_NOTHROW +{ + // Do nothing on error state or when no instruction was emitted. + if (_error || getCodeSize() == 0) return NULL; + + void* p; + _error = _codeGenerator->generate(&p, reinterpret_cast<Assembler*>(this)); + return p; +} + +// ============================================================================ +// [AsmJit::AssemblerCore - Links] +// ============================================================================ + +AssemblerCore::LabelLink* AssemblerCore::_newLabelLink() ASMJIT_NOTHROW +{ + LabelLink* link = _unusedLinks; + + if (link) + { + _unusedLinks = link->prev; + } + else + { + link = (LabelLink*)_zone.zalloc(sizeof(LabelLink)); + if (link == NULL) return NULL; + } + + // clean link + link->prev = NULL; + link->offset = 0; + link->displacement = 0; + link->relocId = -1; + + return link; +} + +// ============================================================================ +// [AsmJit::Assembler - Construction / Destruction] +// ============================================================================ + +Assembler::Assembler(CodeGenerator* codeGenerator) ASMJIT_NOTHROW : + AssemblerIntrinsics(codeGenerator) +{ +} + +Assembler::~Assembler() ASMJIT_NOTHROW +{ +} + +} // AsmJit namespace + +// [Api-End] +#include "ApiEnd.h" diff --git a/lib/AsmJit/AssemblerX86X64.h b/lib/AsmJit/AssemblerX86X64.h new file mode 100644 index 0000000..b660336 --- /dev/null +++ b/lib/AsmJit/AssemblerX86X64.h @@ -0,0 +1,7790 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_ASSEMBLERX86X64_H +#define _ASMJIT_ASSEMBLERX86X64_H + +#if !defined(_ASMJIT_ASSEMBLER_H) +#warning "AsmJit/AssemblerX86X64.h can be only included by AsmJit/Assembler.h" +#endif // _ASMJIT_ASSEMBLER_H + +// [Dependencies] +#include "Build.h" +#include "Defs.h" +#include "Operand.h" +#include "Util.h" + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +//! @addtogroup AsmJit_Core +//! @{ + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +struct CodeGenerator; + +// ============================================================================ +// [AsmJit::AssemblerCore] +// ============================================================================ + +//! @brief AssemblerCore is part of @c Assembler class. +//! +//! @c AssemblerCore class implements part of assembler serializing API. The +//! reason why @c Assembler class was split is that we want to hide exported +//! symbols in dynamically linked libraries. +//! +//! Please always use @c Assembler class instead. +//! +//! @sa @c Assembler. +struct ASMJIT_API AssemblerCore +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Creates Assembler instance. + AssemblerCore(CodeGenerator* codeGenerator) ASMJIT_NOTHROW; + //! @brief Destroys Assembler instance + virtual ~AssemblerCore() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [LabelLink] + // -------------------------------------------------------------------------- + + //! @brief Data structure used to link linked-labels. + struct LabelLink + { + //! @brief Previous link. + LabelLink* prev; + //! @brief Offset. + sysint_t offset; + //! @brief Inlined displacement. + sysint_t displacement; + //! @brief RelocId if link must be absolute when relocated. + sysint_t relocId; + }; + + // -------------------------------------------------------------------------- + // [LabelData] + // -------------------------------------------------------------------------- + + //! @brief Label data. + struct LabelData + { + //! @brief Label offset. + sysint_t offset; + //! @brief Label links chain. + LabelLink* links; + }; + + // -------------------------------------------------------------------------- + // [RelocData] + // -------------------------------------------------------------------------- + + // X86 architecture uses 32-bit absolute addressing model by memory operands, + // but 64-bit mode uses relative addressing model (RIP + displacement). In + // code we are always using relative addressing model for referencing labels + // and embedded data. In 32-bit mode we must patch all references to absolute + // address before we can call generated function. We are patching only memory + // operands. + + //! @brief Code relocation data (relative vs absolute addresses). + struct RelocData + { + enum Type + { + ABSOLUTE_TO_ABSOLUTE = 0, + RELATIVE_TO_ABSOLUTE = 1, + ABSOLUTE_TO_RELATIVE = 2, + ABSOLUTE_TO_RELATIVE_TRAMPOLINE = 3 + }; + + //! @brief Type of relocation. + uint32_t type; + + //! @brief Size of relocation (4 or 8 bytes). + uint32_t size; + + //! @brief Offset from code begin address. + sysint_t offset; + + //! @brief Relative displacement or absolute address. + union + { + //! @brief Relative displacement from code begin address (not to @c offset). + sysint_t destination; + //! @brief Absolute address where to jump; + void* address; + }; + }; + + // -------------------------------------------------------------------------- + // [Code Generator] + // -------------------------------------------------------------------------- + + //! @brief Get code generator. + inline CodeGenerator* getCodeGenerator() const { return _codeGenerator; } + + // -------------------------------------------------------------------------- + // [Memory Management] + // -------------------------------------------------------------------------- + + //! @brief Get zone memory manager. + inline Zone& getZone() ASMJIT_NOTHROW { return _zone; } + + // -------------------------------------------------------------------------- + // [Logging] + // -------------------------------------------------------------------------- + + //! @brief Get logger. + inline Logger* getLogger() const ASMJIT_NOTHROW { return _logger; } + + //! @brief Set logger to @a logger. + virtual void setLogger(Logger* logger) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Error Handling] + // -------------------------------------------------------------------------- + + //! @brief Get error code. + inline uint32_t getError() const ASMJIT_NOTHROW { return _error; } + + //! @brief Set error code. + //! + //! This method is virtual, because higher classes can use it to catch all + //! errors. + virtual void setError(uint32_t error) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Properties] + // -------------------------------------------------------------------------- + + //! @brief Get assembler property. + uint32_t getProperty(uint32_t propertyId); + //! @brief Set assembler property. + void setProperty(uint32_t propertyId, uint32_t value); + + // -------------------------------------------------------------------------- + // [Buffer Getters / Setters] + // -------------------------------------------------------------------------- + + //! @brief Return start of assembler code buffer. + //! + //! Note that buffer address can change if you emit instruction or something + //! else. Use this pointer only when you finished or make sure you do not + //! use returned pointer after emitting. + inline uint8_t* getCode() const ASMJIT_NOTHROW + { return _buffer.getData(); } + + //! @brief Ensure space for next instruction. + //! + //! Note that this method can return false. It's rare and probably you never + //! get this, but in some situations it's still possible. + inline bool ensureSpace() ASMJIT_NOTHROW + { return _buffer.ensureSpace(); } + + //! @brief Return current offset in buffer). + inline sysint_t getOffset() const ASMJIT_NOTHROW + { return _buffer.getOffset(); } + + //! @brief Return current offset in buffer (same as getCffset() + getTramplineSize()). + inline sysint_t getCodeSize() const ASMJIT_NOTHROW + { return _buffer.getOffset() + getTrampolineSize(); } + + //! @brief Get size of all possible trampolines needed to successfuly generate + //! relative jumps to absolute addresses. This value is only non-zero if jmp + //! of call instructions were used with immediate operand (this means jump or + //! call absolute address directly). + //! + //! Currently only _emitJmpOrCallReloc() method can increase trampoline size + //! value. + inline sysint_t getTrampolineSize() const ASMJIT_NOTHROW + { return _trampolineSize; } + + //! @brief Set offset to @a o and returns previous offset. + //! + //! This method can be used to truncate code (previous offset is not + //! recorded) or to overwrite instruction stream at position @a o. + //! + //! @return Previous offset value that can be uset to set offset back later. + inline sysint_t toOffset(sysint_t o) ASMJIT_NOTHROW + { return _buffer.toOffset(o); } + + //! @brief Get capacity of internal code buffer. + inline sysint_t getCapacity() const ASMJIT_NOTHROW + { return _buffer.getCapacity(); } + + //! @brief Clear everything, but not deallocate buffers. + void clear() ASMJIT_NOTHROW; + + //! @brief Free internal buffer and NULL all pointers. + void free() ASMJIT_NOTHROW; + + //! @brief Take internal code buffer and NULL all pointers (you take the ownership). + uint8_t* takeCode() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Stream Setters / Getters] + // -------------------------------------------------------------------------- + + //! @brief Set byte at position @a pos. + inline uint8_t getByteAt(sysint_t pos) const ASMJIT_NOTHROW + { return _buffer.getByteAt(pos); } + + //! @brief Set word at position @a pos. + inline uint16_t getWordAt(sysint_t pos) const ASMJIT_NOTHROW + { return _buffer.getWordAt(pos); } + + //! @brief Set word at position @a pos. + inline uint32_t getDWordAt(sysint_t pos) const ASMJIT_NOTHROW + { return _buffer.getDWordAt(pos); } + + //! @brief Set word at position @a pos. + inline uint64_t getQWordAt(sysint_t pos) const ASMJIT_NOTHROW + { return _buffer.getQWordAt(pos); } + + //! @brief Set byte at position @a pos. + inline void setByteAt(sysint_t pos, uint8_t x) ASMJIT_NOTHROW + { _buffer.setByteAt(pos, x); } + + //! @brief Set word at position @a pos. + inline void setWordAt(sysint_t pos, uint16_t x) ASMJIT_NOTHROW + { _buffer.setWordAt(pos, x); } + + //! @brief Set word at position @a pos. + inline void setDWordAt(sysint_t pos, uint32_t x) ASMJIT_NOTHROW + { _buffer.setDWordAt(pos, x); } + + //! @brief Set word at position @a pos. + inline void setQWordAt(sysint_t pos, uint64_t x) ASMJIT_NOTHROW + { _buffer.setQWordAt(pos, x); } + + //! @brief Set word at position @a pos. + inline int32_t getInt32At(sysint_t pos) const ASMJIT_NOTHROW + { return (int32_t)_buffer.getDWordAt(pos); } + + //! @brief Set int32 at position @a pos. + inline void setInt32At(sysint_t pos, int32_t x) ASMJIT_NOTHROW + { _buffer.setDWordAt(pos, (int32_t)x); } + + //! @brief Set custom variable @a imm at position @a pos. + //! + //! @note This function is used to patch existing code. + void setVarAt(sysint_t pos, sysint_t i, uint8_t isUnsigned, uint32_t size) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Assembler Emitters] + // + // These emitters are not protecting buffer from overrun, this must be + // done is emitX86() methods by: + // if (!canEmit()) return; + // -------------------------------------------------------------------------- + + //! @brief Get whether next instruction can be emitted. + //! + //! This function behaves like @c ensureSpace(), but it also checks if + //! assembler is in error state and in that case it returns @c false. + //! Assembler internally always uses this function before new instruction is + //! emitted. + //! + //! It's implemented like: + //! <code>return ensureSpace() && !getError();</code> + bool canEmit() ASMJIT_NOTHROW; + + //! @brief Emit Byte to internal buffer. + inline void _emitByte(uint8_t x) ASMJIT_NOTHROW + { _buffer.emitByte(x); } + + //! @brief Emit Word (2 bytes) to internal buffer. + inline void _emitWord(uint16_t x) ASMJIT_NOTHROW + { _buffer.emitWord(x); } + + //! @brief Emit DWord (4 bytes) to internal buffer. + inline void _emitDWord(uint32_t x) ASMJIT_NOTHROW + { _buffer.emitDWord(x); } + + //! @brief Emit QWord (8 bytes) to internal buffer. + inline void _emitQWord(uint64_t x) ASMJIT_NOTHROW + { _buffer.emitQWord(x); } + + //! @brief Emit Int32 (4 bytes) to internal buffer. + inline void _emitInt32(int32_t x) ASMJIT_NOTHROW + { _buffer.emitDWord((uint32_t)x); } + + //! @brief Emit system signed integer (4 or 8 bytes) to internal buffer. + inline void _emitSysInt(sysint_t x) ASMJIT_NOTHROW + { _buffer.emitSysInt(x); } + + //! @brief Emit system unsigned integer (4 or 8 bytes) to internal buffer. + inline void _emitSysUInt(sysuint_t x) ASMJIT_NOTHROW + { _buffer.emitSysUInt(x); } + + //! @brief Emit single @a opCode without operands. + inline void _emitOpCode(uint32_t opCode) ASMJIT_NOTHROW + { + // instruction prefix + if (opCode & 0xFF000000) _emitByte((uint8_t)((opCode & 0xFF000000) >> 24)); + // instruction opcodes + if (opCode & 0x00FF0000) _emitByte((uint8_t)((opCode & 0x00FF0000) >> 16)); + if (opCode & 0x0000FF00) _emitByte((uint8_t)((opCode & 0x0000FF00) >> 8)); + // last opcode is always emitted (can be also 0x00) + _emitByte((uint8_t)(opCode & 0x000000FF)); + } + + //! @brief Emit CS (code segmend) prefix. + //! + //! Behavior of this function is to emit code prefix only if memory operand + //! address uses code segment. Code segment is used through memory operand + //! with attached @c AsmJit::Label. + void _emitSegmentPrefix(const Operand& rm) ASMJIT_NOTHROW; + + //! @brief Emit MODR/M byte. + inline void _emitMod(uint8_t m, uint8_t o, uint8_t r) ASMJIT_NOTHROW + { _emitByte(((m & 0x03) << 6) | ((o & 0x07) << 3) | (r & 0x07)); } + + //! @brief Emit SIB byte. + inline void _emitSib(uint8_t s, uint8_t i, uint8_t b) ASMJIT_NOTHROW + { _emitByte(((s & 0x03) << 6) | ((i & 0x07) << 3) | (b & 0x07)); } + + //! @brief Emit REX prefix (64-bit mode only). + inline void _emitRexR(uint8_t w, uint8_t opReg, uint8_t regCode, bool forceRexPrefix) ASMJIT_NOTHROW + { +#if defined(ASMJIT_X64) + uint8_t r = (opReg & 0x8) != 0; + uint8_t b = (regCode & 0x8) != 0; + + // w Default operand size(0=Default, 1=64-bit). + // r Register field (1=high bit extension of the ModR/M REG field). + // x Index field not used in RexR + // b Base field (1=high bit extension of the ModR/M or SIB Base field). + if (w || r || b || forceRexPrefix) + { + _emitByte(0x40 | (w << 3) | (r << 2) | b); + } +#else + ASMJIT_UNUSED(w); + ASMJIT_UNUSED(opReg); + ASMJIT_UNUSED(regCode); + ASMJIT_UNUSED(forceRexPrefix); +#endif // ASMJIT_X64 + } + + //! @brief Emit REX prefix (64-bit mode only). + inline void _emitRexRM(uint8_t w, uint8_t opReg, const Operand& rm, bool forceRexPrefix) ASMJIT_NOTHROW + { +#if defined(ASMJIT_X64) + uint8_t r = (opReg & 0x8) != 0; + uint8_t x = 0; + uint8_t b = 0; + + if (rm.isReg()) + { + b = (reinterpret_cast<const BaseReg&>(rm).getRegCode() & 0x8) != 0; + } + else if (rm.isMem()) + { + x = ((reinterpret_cast<const Mem&>(rm).getIndex() & 0x8) != 0) & (reinterpret_cast<const Mem&>(rm).getIndex() != INVALID_VALUE); + b = ((reinterpret_cast<const Mem&>(rm).getBase() & 0x8) != 0) & (reinterpret_cast<const Mem&>(rm).getBase() != INVALID_VALUE); + } + + // w Default operand size(0=Default, 1=64-bit). + // r Register field (1=high bit extension of the ModR/M REG field). + // x Index field (1=high bit extension of the SIB Index field). + // b Base field (1=high bit extension of the ModR/M or SIB Base field). + if (w || r || x || b || forceRexPrefix) + { + _emitByte(0x40 | (w << 3) | (r << 2) | (x << 1) | b); + } +#else + ASMJIT_UNUSED(w); + ASMJIT_UNUSED(opReg); + ASMJIT_UNUSED(rm); +#endif // ASMJIT_X64 + } + + //! @brief Emit Register / Register - calls _emitMod(3, opReg, r) + inline void _emitModR(uint8_t opReg, uint8_t r) ASMJIT_NOTHROW + { _emitMod(3, opReg, r); } + + //! @brief Emit Register / Register - calls _emitMod(3, opReg, r.code()) + inline void _emitModR(uint8_t opReg, const BaseReg& r) ASMJIT_NOTHROW + { _emitMod(3, opReg, r.getRegCode()); } + + //! @brief Emit register / memory address combination to buffer. + //! + //! This method can hangle addresses from simple to complex ones with + //! index and displacement. + void _emitModM(uint8_t opReg, const Mem& mem, sysint_t immSize) ASMJIT_NOTHROW; + + //! @brief Emit Reg<-Reg or Reg<-Reg|Mem ModRM (can be followed by SIB + //! and displacement) to buffer. + //! + //! This function internally calls @c _emitModM() or _emitModR() that depends + //! to @a op type. + //! + //! @note @a opReg is usually real register ID (see @c R) but some instructions + //! have specific format and in that cases @a opReg is part of opcode. + void _emitModRM(uint8_t opReg, const Operand& op, sysint_t immSize) ASMJIT_NOTHROW; + + //! @brief Emit instruction where register is inlined to opcode. + void _emitX86Inl(uint32_t opCode, uint8_t i16bit, uint8_t rexw, uint8_t reg, bool forceRexPrefix) ASMJIT_NOTHROW; + + //! @brief Emit instruction with reg/memory operand. + void _emitX86RM(uint32_t opCode, uint8_t i16bit, uint8_t rexw, uint8_t o, + const Operand& op, sysint_t immSize, bool forceRexPrefix) ASMJIT_NOTHROW; + + //! @brief Emit FPU instruction with no operands. + void _emitFpu(uint32_t opCode) ASMJIT_NOTHROW; + + //! @brief Emit FPU instruction with one operand @a sti (index of FPU register). + void _emitFpuSTI(uint32_t opCode, uint32_t sti) ASMJIT_NOTHROW; + + //! @brief Emit FPU instruction with one operand @a opReg and memory operand @a mem. + void _emitFpuMEM(uint32_t opCode, uint8_t opReg, const Mem& mem) ASMJIT_NOTHROW; + + //! @brief Emit MMX/SSE instruction. + void _emitMmu(uint32_t opCode, uint8_t rexw, uint8_t opReg, const Operand& src, + sysint_t immSize) ASMJIT_NOTHROW; + + //! @brief Emit displacement. + LabelLink* _emitDisplacement(LabelData& l_data, sysint_t inlinedDisplacement, int size) ASMJIT_NOTHROW; + + //! @brief Emit relative relocation to absolute pointer @a target. It's needed + //! to add what instruction is emitting this, because in x64 mode the relative + //! displacement can be impossible to calculate and in this case the trampoline + //! is used. + void _emitJmpOrCallReloc(uint32_t instruction, void* target) ASMJIT_NOTHROW; + + // Helpers to decrease binary code size. These four emit methods are just + // helpers thats used by assembler. They call emitX86() adding NULLs + // to first, second and third operand, if needed. + + //! @brief Emit X86/FPU or MM/XMM instruction. + void _emitInstruction(uint32_t code) ASMJIT_NOTHROW; + + //! @brief Emit X86/FPU or MM/XMM instruction. + void _emitInstruction(uint32_t code, const Operand* o0) ASMJIT_NOTHROW; + + //! @brief Emit X86/FPU or MM/XMM instruction. + void _emitInstruction(uint32_t code, const Operand* o0, const Operand* o1) ASMJIT_NOTHROW; + + //! @brief Emit X86/FPU or MM/XMM instruction. + //! + //! Operands @a o1, @a o2 or @a o3 can be @c NULL if they are not used. + //! + //! Hint: Use @c emitX86() helpers to emit instructions. + void _emitInstruction(uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2) ASMJIT_NOTHROW; + + //! @brief Private method for emitting jcc. + void _emitJcc(uint32_t code, const Label* label, uint32_t hint) ASMJIT_NOTHROW; + + //! @brief Private method for emitting short jcc. + inline void _emitShortJcc(uint32_t code, const Label* label, uint32_t hint) + { + _emitOptions |= EMIT_OPTION_SHORT_JUMP; + _emitJcc(code, label, hint); + } + + // -------------------------------------------------------------------------- + // [Relocation helpers] + // -------------------------------------------------------------------------- + + //! @brief Relocate code to a given address @a dst. + //! + //! @param dst Where the relocated code should me stored. The pointer can be + //! address returned by virtual memory allocator or your own address if you + //! want only to store the code for later reuse (or load, etc...). + //! @param addressBase Base address used for relocation. When using JIT code + //! generation, this will be the same as @a dst, only casted to system + //! integer type. But when generating code for remote process then the value + //! can be different. + //! + //! @retval The bytes used. Code-generator can create trampolines which are + //! used when calling other functions inside the JIT code. However, these + //! trampolines can be unused so the relocCode() returns the exact size needed + //! for the function. + //! + //! A given buffer will be overwritten, to get number of bytes required use + //! @c getCodeSize() . + virtual sysuint_t relocCode(void* dst, sysuint_t addressBase) const ASMJIT_NOTHROW; + + //! @brief Simplifed version of @c relocCode() method. + inline sysuint_t relocCode(void* dst) const ASMJIT_NOTHROW + { + return relocCode(dst, (sysuint_t)dst); + } + + // -------------------------------------------------------------------------- + // [Embed] + // -------------------------------------------------------------------------- + + //! @brief Embed data into instruction stream. + void embed(const void* data, sysuint_t length) ASMJIT_NOTHROW; + //! @brief Embed absolute label pointer (4 or 8 bytes). + void embedLabel(const Label& label) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Align] + // -------------------------------------------------------------------------- + + //! @brief Align target buffer to @a m bytes. + //! + //! Typical usage of this is to align labels at start of the inner loops. + //! + //! Inserts @c nop() instructions or CPU optimized NOPs. + void align(uint32_t m) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Label] + // -------------------------------------------------------------------------- + + //! @brief Create and return new label. + Label newLabel() ASMJIT_NOTHROW; + + //! @brief Register labels (used by @c Compiler). + void registerLabels(sysuint_t count) ASMJIT_NOTHROW; + + //! @brief Bind label to the current offset. + //! + //! @note Label can be bound only once! + void bind(const Label& label) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Make] + // -------------------------------------------------------------------------- + + //! @brief Make is convenience method to make currently serialized code and + //! return pointer to generated function. + //! + //! What you need is only to cast this pointer to your function type and call + //! it. Note that if there was an error and calling @c getError() method not + //! returns @c ERROR_NONE (zero) then this function always return @c NULL and + //! error value remains the same. + virtual void* make() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Links] + // -------------------------------------------------------------------------- + + LabelLink* _newLabelLink() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + + //! @brief Code generator instance. + CodeGenerator* _codeGenerator; + + //! @brief Zone memory management. + Zone _zone; + + //! @brief Logger. + Logger* _logger; + + //! @brief Last error code. + uint32_t _error; + + //! @brief Properties. + uint32_t _properties; + + //! @brief Emit flags for next instruction (cleared after emit). + uint32_t _emitOptions; + + //! @brief Binary code buffer. + Buffer _buffer; + + //! @brief Size of possible trampolines. + sysint_t _trampolineSize; + + //! @brief Linked list of unused links (@c LabelLink* structures) + LabelLink* _unusedLinks; + + //! @brief Labels data. + PodVector<LabelData> _labelData; + + //! @brief Relocations data. + PodVector<RelocData> _relocData; + + //! @brief Comment that will be logger by next emitted instruction. After + //! instruction is logger the _comment is set to NULL. + const char* _comment; + + friend struct CompilerCore; + friend struct EInstruction; +}; + +// ============================================================================ +// [AsmJit::AssemblerIntrinsics] +// ============================================================================ + +//! @brief AssemblerIntrinsics is part of @c Assembler class. +//! +//! @c AssemblerIntrinsics class implements part of assembler serializing API. +//! The reason why @c Assembler class was split is that we want to hide exported +//! symbols in dynamically linked libraries. +//! +//! Please always use @c Assembler class instead. +//! +//! @sa @c Assembler. +struct ASMJIT_HIDDEN AssemblerIntrinsics : public AssemblerCore +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + inline AssemblerIntrinsics(CodeGenerator* codeGenerator) ASMJIT_NOTHROW : + AssemblerCore(codeGenerator) + { + } + + // -------------------------------------------------------------------------- + // [Embed] + // -------------------------------------------------------------------------- + + //! @brief Add 8-bit integer data to the instuction stream. + inline void db(uint8_t x) ASMJIT_NOTHROW { embed(&x, 1); } + //! @brief Add 16-bit integer data to the instuction stream. + inline void dw(uint16_t x) ASMJIT_NOTHROW { embed(&x, 2); } + //! @brief Add 32-bit integer data to the instuction stream. + inline void dd(uint32_t x) ASMJIT_NOTHROW { embed(&x, 4); } + //! @brief Add 64-bit integer data to the instuction stream. + inline void dq(uint64_t x) ASMJIT_NOTHROW { embed(&x, 8); } + + //! @brief Add 8-bit integer data to the instuction stream. + inline void dint8(int8_t x) ASMJIT_NOTHROW { embed(&x, sizeof(int8_t)); } + //! @brief Add 8-bit integer data to the instuction stream. + inline void duint8(uint8_t x) ASMJIT_NOTHROW { embed(&x, sizeof(uint8_t)); } + + //! @brief Add 16-bit integer data to the instuction stream. + inline void dint16(int16_t x) ASMJIT_NOTHROW { embed(&x, sizeof(int16_t)); } + //! @brief Add 16-bit integer data to the instuction stream. + inline void duint16(uint16_t x) ASMJIT_NOTHROW { embed(&x, sizeof(uint16_t)); } + + //! @brief Add 32-bit integer data to the instuction stream. + inline void dint32(int32_t x) ASMJIT_NOTHROW { embed(&x, sizeof(int32_t)); } + //! @brief Add 32-bit integer data to the instuction stream. + inline void duint32(uint32_t x) ASMJIT_NOTHROW { embed(&x, sizeof(uint32_t)); } + + //! @brief Add 64-bit integer data to the instuction stream. + inline void dint64(int64_t x) ASMJIT_NOTHROW { embed(&x, sizeof(int64_t)); } + //! @brief Add 64-bit integer data to the instuction stream. + inline void duint64(uint64_t x) ASMJIT_NOTHROW { embed(&x, sizeof(uint64_t)); } + + //! @brief Add system-integer data to the instuction stream. + inline void dsysint(sysint_t x) ASMJIT_NOTHROW { embed(&x, sizeof(sysint_t)); } + //! @brief Add system-integer data to the instuction stream. + inline void dsysuint(sysuint_t x) ASMJIT_NOTHROW { embed(&x, sizeof(sysuint_t)); } + + //! @brief Add float data to the instuction stream. + inline void dfloat(float x) ASMJIT_NOTHROW { embed(&x, sizeof(float)); } + //! @brief Add double data to the instuction stream. + inline void ddouble(double x) ASMJIT_NOTHROW { embed(&x, sizeof(double)); } + + //! @brief Add pointer data to the instuction stream. + inline void dptr(void* x) ASMJIT_NOTHROW { embed(&x, sizeof(void*)); } + + //! @brief Add MM data to the instuction stream. + inline void dmm(const MMData& x) ASMJIT_NOTHROW { embed(&x, sizeof(MMData)); } + //! @brief Add XMM data to the instuction stream. + inline void dxmm(const XMMData& x) ASMJIT_NOTHROW { embed(&x, sizeof(XMMData)); } + + //! @brief Add data to the instuction stream. + inline void data(const void* data, sysuint_t size) ASMJIT_NOTHROW { embed(data, size); } + + //! @brief Add data in a given structure instance to the instuction stream. + template<typename T> + inline void dstruct(const T& x) ASMJIT_NOTHROW { embed(&x, sizeof(T)); } + + // -------------------------------------------------------------------------- + // [X86 Instructions] + // -------------------------------------------------------------------------- + + //! @brief Add with Carry. + inline void adc(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_ADC, &dst, &src); + } + //! @brief Add with Carry. + inline void adc(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_ADC, &dst, &src); + } + //! @brief Add with Carry. + inline void adc(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_ADC, &dst, &src); + } + //! @brief Add with Carry. + inline void adc(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_ADC, &dst, &src); + } + //! @brief Add with Carry. + inline void adc(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_ADC, &dst, &src); + } + + //! @brief Add. + inline void add(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_ADD, &dst, &src); + } + //! @brief Add. + inline void add(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_ADD, &dst, &src); + } + //! @brief Add. + inline void add(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_ADD, &dst, &src); + } + //! @brief Add. + inline void add(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_ADD, &dst, &src); + } + //! @brief Add. + inline void add(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_ADD, &dst, &src); + } + + //! @brief Logical And. + inline void and_(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_AND, &dst, &src); + } + //! @brief Logical And. + inline void and_(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_AND, &dst, &src); + } + //! @brief Logical And. + inline void and_(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_AND, &dst, &src); + } + //! @brief Logical And. + inline void and_(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_AND, &dst, &src); + } + //! @brief Logical And. + inline void and_(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_AND, &dst, &src); + } + + //! @brief Bit Scan Forward. + inline void bsf(const GPReg& dst, const GPReg& src) + { + ASMJIT_ASSERT(!dst.isGPB()); + _emitInstruction(INST_BSF, &dst, &src); + } + //! @brief Bit Scan Forward. + inline void bsf(const GPReg& dst, const Mem& src) + { + ASMJIT_ASSERT(!dst.isGPB()); + _emitInstruction(INST_BSF, &dst, &src); + } + + //! @brief Bit Scan Reverse. + inline void bsr(const GPReg& dst, const GPReg& src) + { + ASMJIT_ASSERT(!dst.isGPB()); + _emitInstruction(INST_BSR, &dst, &src); + } + //! @brief Bit Scan Reverse. + inline void bsr(const GPReg& dst, const Mem& src) + { + ASMJIT_ASSERT(!dst.isGPB()); + _emitInstruction(INST_BSR, &dst, &src); + } + + //! @brief Byte swap (32-bit or 64-bit registers only) (i486). + inline void bswap(const GPReg& dst) + { + ASMJIT_ASSERT(dst.getRegType() == REG_TYPE_GPD || dst.getRegType() == REG_TYPE_GPQ); + _emitInstruction(INST_BSWAP, &dst); + } + + //! @brief Bit test. + inline void bt(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_BT, &dst, &src); + } + //! @brief Bit test. + inline void bt(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_BT, &dst, &src); + } + //! @brief Bit test. + inline void bt(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_BT, &dst, &src); + } + //! @brief Bit test. + inline void bt(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_BT, &dst, &src); + } + + //! @brief Bit test and complement. + inline void btc(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_BTC, &dst, &src); + } + //! @brief Bit test and complement. + inline void btc(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_BTC, &dst, &src); + } + //! @brief Bit test and complement. + inline void btc(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_BTC, &dst, &src); + } + //! @brief Bit test and complement. + inline void btc(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_BTC, &dst, &src); + } + + //! @brief Bit test and reset. + inline void btr(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_BTR, &dst, &src); + } + //! @brief Bit test and reset. + inline void btr(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_BTR, &dst, &src); + } + //! @brief Bit test and reset. + inline void btr(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_BTR, &dst, &src); + } + //! @brief Bit test and reset. + inline void btr(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_BTR, &dst, &src); + } + + //! @brief Bit test and set. + inline void bts(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_BTS, &dst, &src); + } + //! @brief Bit test and set. + inline void bts(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_BTS, &dst, &src); + } + //! @brief Bit test and set. + inline void bts(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_BTS, &dst, &src); + } + //! @brief Bit test and set. + inline void bts(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_BTS, &dst, &src); + } + + //! @brief Call Procedure. + inline void call(const GPReg& dst) + { + ASMJIT_ASSERT(dst.isRegType(REG_TYPE_GPN)); + _emitInstruction(INST_CALL, &dst); + } + //! @brief Call Procedure. + inline void call(const Mem& dst) + { + _emitInstruction(INST_CALL, &dst); + } + //! @brief Call Procedure. + inline void call(const Imm& dst) + { + _emitInstruction(INST_CALL, &dst); + } + //! @brief Call Procedure. + //! @overload + inline void call(void* dst) + { + Imm imm((sysint_t)dst); + _emitInstruction(INST_CALL, &imm); + } + + //! @brief Call Procedure. + inline void call(const Label& label) + { + _emitInstruction(INST_CALL, &label); + } + + //! @brief Convert Byte to Word (Sign Extend). + //! + //! AX <- Sign Extend AL + inline void cbw() + { + _emitInstruction(INST_CBW); + } + + //! @brief Convert Word to DWord (Sign Extend). + //! + //! EAX <- Sign Extend AX + inline void cwde() + { + _emitInstruction(INST_CWDE); + } + +#if defined(ASMJIT_X64) + //! @brief Convert DWord to QWord (Sign Extend). + //! + //! RAX <- Sign Extend EAX + inline void cdqe() + { + _emitInstruction(INST_CDQE); + } +#endif // ASMJIT_X64 + + //! @brief Clear Carry flag + //! + //! This instruction clears the CF flag in the EFLAGS register. + inline void clc() + { + _emitInstruction(INST_CLC); + } + + //! @brief Clear Direction flag + //! + //! This instruction clears the DF flag in the EFLAGS register. + inline void cld() + { + _emitInstruction(INST_CLD); + } + + //! @brief Complement Carry Flag. + //! + //! This instruction complements the CF flag in the EFLAGS register. + //! (CF = NOT CF) + inline void cmc() + { + _emitInstruction(INST_CMC); + } + + //! @brief Conditional Move. + inline void cmov(CONDITION cc, const GPReg& dst, const GPReg& src) + { + _emitInstruction(ConditionToInstruction::toCMovCC(cc), &dst, &src); + } + + //! @brief Conditional Move. + inline void cmov(CONDITION cc, const GPReg& dst, const Mem& src) + { + _emitInstruction(ConditionToInstruction::toCMovCC(cc), &dst, &src); + } + + //! @brief Conditional Move. + inline void cmova (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVA , &dst, &src); } + //! @brief Conditional Move. + inline void cmova (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVA , &dst, &src); } + //! @brief Conditional Move. + inline void cmovae (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVAE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovae (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVAE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovb (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVB , &dst, &src); } + //! @brief Conditional Move. + inline void cmovb (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVB , &dst, &src); } + //! @brief Conditional Move. + inline void cmovbe (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVBE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovbe (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVBE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovc (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVC , &dst, &src); } + //! @brief Conditional Move. + inline void cmovc (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVC , &dst, &src); } + //! @brief Conditional Move. + inline void cmove (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVE , &dst, &src); } + //! @brief Conditional Move. + inline void cmove (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovg (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVG , &dst, &src); } + //! @brief Conditional Move. + inline void cmovg (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVG , &dst, &src); } + //! @brief Conditional Move. + inline void cmovge (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVGE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovge (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVGE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovl (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVL , &dst, &src); } + //! @brief Conditional Move. + inline void cmovl (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVL , &dst, &src); } + //! @brief Conditional Move. + inline void cmovle (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVLE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovle (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVLE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovna (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNA , &dst, &src); } + //! @brief Conditional Move. + inline void cmovna (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNA , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnae(const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNAE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnae(const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNAE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnb (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNB , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnb (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNB , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnbe(const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNBE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnbe(const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNBE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnc (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNC , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnc (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNC , &dst, &src); } + //! @brief Conditional Move. + inline void cmovne (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovne (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovng (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNG , &dst, &src); } + //! @brief Conditional Move. + inline void cmovng (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNG , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnge(const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNGE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnge(const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNGE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnl (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNL , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnl (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNL , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnle(const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNLE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnle(const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNLE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovno (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNO , &dst, &src); } + //! @brief Conditional Move. + inline void cmovno (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNO , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnp (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNP , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnp (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNP , &dst, &src); } + //! @brief Conditional Move. + inline void cmovns (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNS , &dst, &src); } + //! @brief Conditional Move. + inline void cmovns (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNS , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnz (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVNZ , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnz (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVNZ , &dst, &src); } + //! @brief Conditional Move. + inline void cmovo (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVO , &dst, &src); } + //! @brief Conditional Move. + inline void cmovo (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVO , &dst, &src); } + //! @brief Conditional Move. + inline void cmovp (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVP , &dst, &src); } + //! @brief Conditional Move. + inline void cmovp (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVP , &dst, &src); } + //! @brief Conditional Move. + inline void cmovpe (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVPE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovpe (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVPE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovpo (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVPO , &dst, &src); } + //! @brief Conditional Move. + inline void cmovpo (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVPO , &dst, &src); } + //! @brief Conditional Move. + inline void cmovs (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVS , &dst, &src); } + //! @brief Conditional Move. + inline void cmovs (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVS , &dst, &src); } + //! @brief Conditional Move. + inline void cmovz (const GPReg& dst, const GPReg& src) { _emitInstruction(INST_CMOVZ , &dst, &src); } + //! @brief Conditional Move. + inline void cmovz (const GPReg& dst, const Mem& src) { _emitInstruction(INST_CMOVZ , &dst, &src); } + + //! @brief Compare Two Operands. + inline void cmp(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_CMP, &dst, &src); + } + //! @brief Compare Two Operands. + inline void cmp(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_CMP, &dst, &src); + } + //! @brief Compare Two Operands. + inline void cmp(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_CMP, &dst, &src); + } + //! @brief Compare Two Operands. + inline void cmp(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_CMP, &dst, &src); + } + //! @brief Compare Two Operands. + inline void cmp(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_CMP, &dst, &src); + } + + //! @brief Compare and Exchange (i486). + inline void cmpxchg(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_CMPXCHG, &dst, &src); + } + //! @brief Compare and Exchange (i486). + inline void cmpxchg(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_CMPXCHG, &dst, &src); + } + + //! @brief Compares the 64-bit value in EDX:EAX with the memory operand (Pentium). + //! + //! If the values are equal, then this instruction stores the 64-bit value + //! in ECX:EBX into the memory operand and sets the zero flag. Otherwise, + //! this instruction copies the 64-bit memory operand into the EDX:EAX + //! registers and clears the zero flag. + inline void cmpxchg8b(const Mem& dst) + { + _emitInstruction(INST_CMPXCHG8B, &dst); + } + +#if defined(ASMJIT_X64) + //! @brief Compares the 128-bit value in RDX:RAX with the memory operand (X64). + //! + //! If the values are equal, then this instruction stores the 128-bit value + //! in RCX:RBX into the memory operand and sets the zero flag. Otherwise, + //! this instruction copies the 128-bit memory operand into the RDX:RAX + //! registers and clears the zero flag. + inline void cmpxchg16b(const Mem& dst) + { + _emitInstruction(INST_CMPXCHG16B, &dst); + } +#endif // ASMJIT_X64 + + //! @brief CPU Identification (i486). + inline void cpuid() + { + _emitInstruction(INST_CPUID); + } + +#if defined(ASMJIT_X86) + //! @brief Decimal adjust AL after addition + //! + //! This instruction adjusts the sum of two packed BCD values to create + //! a packed BCD result. + //! + //! @note This instruction is only available in 32-bit mode. + inline void daa() + { + _emitInstruction(INST_DAA); + } +#endif // ASMJIT_X86 + +#if defined(ASMJIT_X86) + //! @brief Decimal adjust AL after subtraction + //! + //! This instruction adjusts the result of the subtraction of two packed + //! BCD values to create a packed BCD result. + //! + //! @note This instruction is only available in 32-bit mode. + inline void das() + { + _emitInstruction(INST_DAS); + } +#endif // ASMJIT_X86 + + //! @brief Decrement by 1. + //! @note This instruction can be slower than sub(dst, 1) + inline void dec(const GPReg& dst) + { + _emitInstruction(INST_DEC, &dst); + } + //! @brief Decrement by 1. + //! @note This instruction can be slower than sub(dst, 1) + inline void dec(const Mem& dst) + { + _emitInstruction(INST_DEC, &dst); + } + + //! @brief Unsigned divide. + //! + //! This instruction divides (unsigned) the value in the AL, AX, or EAX + //! register by the source operand and stores the result in the AX, + //! DX:AX, or EDX:EAX registers. + inline void div(const GPReg& src) + { + _emitInstruction(INST_DIV, &src); + } + //! @brief Unsigned divide. + //! @overload + inline void div(const Mem& src) + { + _emitInstruction(INST_DIV, &src); + } + + //! @brief Make Stack Frame for Procedure Parameters. + inline void enter(const Imm& imm16, const Imm& imm8) + { + _emitInstruction(INST_ENTER, &imm16, &imm8); + } + + //! @brief Signed divide. + //! + //! This instruction divides (signed) the value in the AL, AX, or EAX + //! register by the source operand and stores the result in the AX, + //! DX:AX, or EDX:EAX registers. + inline void idiv(const GPReg& src) + { + _emitInstruction(INST_IDIV, &src); + } + //! @brief Signed divide. + //! @overload + inline void idiv(const Mem& src) + { + _emitInstruction(INST_IDIV, &src); + } + + //! @brief Signed multiply. + //! + //! Source operand (in a general-purpose register or memory location) + //! is multiplied by the value in the AL, AX, or EAX register (depending + //! on the operand size) and the product is stored in the AX, DX:AX, or + //! EDX:EAX registers, respectively. + inline void imul(const GPReg& src) + { + _emitInstruction(INST_IMUL, &src); + } + //! @overload + inline void imul(const Mem& src) + { + _emitInstruction(INST_IMUL, &src); + } + + //! @brief Signed multiply. + //! + //! Destination operand (the first operand) is multiplied by the source + //! operand (second operand). The destination operand is a general-purpose + //! register and the source operand is an immediate value, a general-purpose + //! register, or a memory location. The product is then stored in the + //! destination operand location. + inline void imul(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_IMUL, &dst, &src); + } + //! @brief Signed multiply. + //! @overload + inline void imul(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_IMUL, &dst, &src); + } + //! @brief Signed multiply. + //! @overload + inline void imul(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_IMUL, &dst, &src); + } + + //! @brief Signed multiply. + //! + //! source operand (which can be a general-purpose register or a memory + //! location) is multiplied by the second source operand (an immediate + //! value). The product is then stored in the destination operand + //! (a general-purpose register). + inline void imul(const GPReg& dst, const GPReg& src, const Imm& imm) + { + _emitInstruction(INST_IMUL, &dst, &src, &imm); + } + //! @overload + inline void imul(const GPReg& dst, const Mem& src, const Imm& imm) + { + _emitInstruction(INST_IMUL, &dst, &src, &imm); + } + + //! @brief Increment by 1. + //! @note This instruction can be slower than add(dst, 1) + inline void inc(const GPReg& dst) + { + _emitInstruction(INST_INC, &dst); + } + //! @brief Increment by 1. + //! @note This instruction can be slower than add(dst, 1) + inline void inc(const Mem& dst) + { + _emitInstruction(INST_INC, &dst); + } + + //! @brief Interrupt 3 - trap to debugger. + inline void int3() + { + _emitInstruction(INST_INT3); + } + + //! @brief Jump to label @a label if condition @a cc is met. + //! + //! This instruction checks the state of one or more of the status flags in + //! the EFLAGS register (CF, OF, PF, SF, and ZF) and, if the flags are in the + //! specified state (condition), performs a jump to the target instruction + //! specified by the destination operand. A condition code (cc) is associated + //! with each instruction to indicate the condition being tested for. If the + //! condition is not satisfied, the jump is not performed and execution + //! continues with the instruction following the Jcc instruction. + inline void j(CONDITION cc, const Label& label, uint32_t hint = HINT_NONE) + { + _emitJcc(ConditionToInstruction::toJCC(cc), &label, hint); + } + + //! @brief Jump to label @a label if condition is met. + inline void ja (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JA , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jae (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JAE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jb (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JB , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jbe (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JBE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jc (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JC , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void je (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jg (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JG , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jge (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JGE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jl (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JL , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jle (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JLE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jna (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNA , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnae(const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNAE, &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnb (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNB , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnbe(const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNBE, &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnc (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNC , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jne (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jng (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNG , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnge(const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNGE, &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnl (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNL , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnle(const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNLE, &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jno (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNO , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnp (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNP , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jns (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNS , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnz (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNZ , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jo (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JO , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jp (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JP , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jpe (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JPE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jpo (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JPO , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void js (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JS , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jz (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JZ , &label, hint); } + + //! @brief Short jump to label @a label if condition @a cc is met. + //! @sa j() + inline void short_j(CONDITION cc, const Label& label, uint32_t hint = HINT_NONE) + { + _emitOptions |= EMIT_OPTION_SHORT_JUMP; + j(cc, label, hint); + } + + //! @brief Short jump to label @a label if condition is met. + inline void short_ja (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JA , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jae (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JAE , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jb (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JB , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jbe (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JBE , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jc (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JC , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_je (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JE , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jg (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JG , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jge (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JGE , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jl (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JL , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jle (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JLE , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jna (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNA , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jnae(const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNAE, &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jnb (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNB , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jnbe(const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNBE, &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jnc (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNC , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jne (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNE , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jng (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNG , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jnge(const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNGE, &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jnl (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNL , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jnle(const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNLE, &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jno (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNO , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jnp (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNP , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jns (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNS , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jnz (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JNZ , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jo (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JO , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jp (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JP , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jpe (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JPE , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jpo (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JPO , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_js (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JS , &label, hint); } + //! @brief Short jump to label @a label if condition is met. + inline void short_jz (const Label& label, uint32_t hint = HINT_NONE) { _emitShortJcc(INST_JZ , &label, hint); } + + //! @brief Jump. + //! @overload + inline void jmp(const GPReg& dst) + { + _emitInstruction(INST_JMP, &dst); + } + //! @brief Jump. + //! @overload + inline void jmp(const Mem& dst) + { + _emitInstruction(INST_JMP, &dst); + } + //! @brief Jump. + //! @overload + inline void jmp(const Imm& dst) + { + _emitInstruction(INST_JMP, &dst); + } + + //! @brief Jump. + //! @overload + inline void jmp(void* dst) + { + Imm imm((sysint_t)dst); + _emitInstruction(INST_JMP, &imm); + } + + //! @brief Jump. + //! + //! This instruction transfers program control to a different point + //! in the instruction stream without recording return information. + //! The destination (target) operand specifies the label of the + //! instruction being jumped to. + inline void jmp(const Label& label) + { + _emitInstruction(INST_JMP, &label); + } + + //! @brief Short jump. + //! @sa jmp() + inline void short_jmp(const Label& label) + { + _emitOptions |= EMIT_OPTION_SHORT_JUMP; + _emitInstruction(INST_JMP, &label); + } + + //! @brief Load Effective Address + //! + //! This instruction computes the effective address of the second + //! operand (the source operand) and stores it in the first operand + //! (destination operand). The source operand is a memory address + //! (offset part) specified with one of the processors addressing modes. + //! The destination operand is a general-purpose register. + inline void lea(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_LEA, &dst, &src); + } + + //! @brief High Level Procedure Exit. + inline void leave() + { + _emitInstruction(INST_LEAVE); + } + + //! @brief Move. + //! + //! This instruction copies the second operand (source operand) to the first + //! operand (destination operand). The source operand can be an immediate + //! value, general-purpose register, segment register, or memory location. + //! The destination register can be a general-purpose register, segment + //! register, or memory location. Both operands must be the same size, which + //! can be a byte, a word, or a DWORD. + //! + //! @note To move MMX or SSE registers to/from GP registers or memory, use + //! corresponding functions: @c movd(), @c movq(), etc. Passing MMX or SSE + //! registers to @c mov() is illegal. + inline void mov(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_MOV, &dst, &src); + } + //! @brief Move. + //! @overload + inline void mov(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_MOV, &dst, &src); + } + //! @brief Move. + //! @overload + inline void mov(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_MOV, &dst, &src); + } + //! @brief Move. + //! @overload + inline void mov(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_MOV, &dst, &src); + } + //! @brief Move. + //! @overload + inline void mov(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_MOV, &dst, &src); + } + + //! @brief Move byte, word, dword or qword from absolute address @a src to + //! AL, AX, EAX or RAX register. + inline void mov_ptr(const GPReg& dst, void* src) + { + ASMJIT_ASSERT(dst.getRegIndex() == 0); + Imm imm((sysint_t)src); + _emitInstruction(INST_MOV_PTR, &dst, &imm); + } + + //! @brief Move byte, word, dword or qword from AL, AX, EAX or RAX register + //! to absolute address @a dst. + inline void mov_ptr(void* dst, const GPReg& src) + { + ASMJIT_ASSERT(src.getRegIndex() == 0); + Imm imm((sysint_t)dst); + _emitInstruction(INST_MOV_PTR, &imm, &src); + } + + //! @brief Move with Sign-Extension. + //! + //! This instruction copies the contents of the source operand (register + //! or memory location) to the destination operand (register) and sign + //! extends the value to 16, 32 or 64-bits. + //! + //! @sa movsxd(). + void movsx(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_MOVSX, &dst, &src); + } + //! @brief Move with Sign-Extension. + //! @overload + void movsx(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVSX, &dst, &src); + } + +#if defined(ASMJIT_X64) + //! @brief Move DWord to QWord with sign-extension. + inline void movsxd(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_MOVSXD, &dst, &src); + } + //! @brief Move DWord to QWord with sign-extension. + //! @overload + inline void movsxd(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVSXD, &dst, &src); + } +#endif // ASMJIT_X64 + + //! @brief Move with Zero-Extend. + //! + //! This instruction copies the contents of the source operand (register + //! or memory location) to the destination operand (register) and zero + //! extends the value to 16 or 32-bits. The size of the converted value + //! depends on the operand-size attribute. + inline void movzx(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_MOVZX, &dst, &src); + } + //! @brief Move with Zero-Extend. + //! @brief Overload + inline void movzx(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVZX, &dst, &src); + } + + //! @brief Unsigned multiply. + //! + //! Source operand (in a general-purpose register or memory location) + //! is multiplied by the value in the AL, AX, or EAX register (depending + //! on the operand size) and the product is stored in the AX, DX:AX, or + //! EDX:EAX registers, respectively. + inline void mul(const GPReg& src) + { + _emitInstruction(INST_MUL, &src); + } + //! @brief Unsigned multiply. + //! @overload + inline void mul(const Mem& src) + { + _emitInstruction(INST_MUL, &src); + } + + //! @brief Two's Complement Negation. + inline void neg(const GPReg& dst) + { + _emitInstruction(INST_NEG, &dst); + } + //! @brief Two's Complement Negation. + inline void neg(const Mem& dst) + { + _emitInstruction(INST_NEG, &dst); + } + + //! @brief No Operation. + //! + //! This instruction performs no operation. This instruction is a one-byte + //! instruction that takes up space in the instruction stream but does not + //! affect the machine context, except the EIP register. The NOP instruction + //! is an alias mnemonic for the XCHG (E)AX, (E)AX instruction. + inline void nop() + { + _emitInstruction(INST_NOP); + } + + //! @brief One's Complement Negation. + inline void not_(const GPReg& dst) + { + _emitInstruction(INST_NOT, &dst); + } + //! @brief One's Complement Negation. + inline void not_(const Mem& dst) + { + _emitInstruction(INST_NOT, &dst); + } + + //! @brief Logical Inclusive OR. + inline void or_(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_OR, &dst, &src); + } + //! @brief Logical Inclusive OR. + inline void or_(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_OR, &dst, &src); + } + //! @brief Logical Inclusive OR. + inline void or_(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_OR, &dst, &src); + } + //! @brief Logical Inclusive OR. + inline void or_(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_OR, &dst, &src); + } + //! @brief Logical Inclusive OR. + inline void or_(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_OR, &dst, &src); + } + + //! @brief Pop a Value from the Stack. + //! + //! This instruction loads the value from the top of the stack to the location + //! specified with the destination operand and then increments the stack pointer. + //! The destination operand can be a general purpose register, memory location, + //! or segment register. + inline void pop(const GPReg& dst) + { + ASMJIT_ASSERT(dst.isRegType(REG_TYPE_GPW) || dst.isRegType(REG_TYPE_GPN)); + _emitInstruction(INST_POP, &dst); + } + + inline void pop(const Mem& dst) + { + ASMJIT_ASSERT(dst.getSize() == 2 || dst.getSize() == sizeof(sysint_t)); + _emitInstruction(INST_POP, &dst); + } + +#if defined(ASMJIT_X86) + //! @brief Pop All General-Purpose Registers. + //! + //! Pop EDI, ESI, EBP, EBX, EDX, ECX, and EAX. + inline void popad() + { + _emitInstruction(INST_POPAD); + } +#endif // ASMJIT_X86 + + //! @brief Pop Stack into EFLAGS Register (32-bit or 64-bit). + inline void popf() + { +#if defined(ASMJIT_X86) + popfd(); +#else + popfq(); +#endif + } + +#if defined(ASMJIT_X86) + //! @brief Pop Stack into EFLAGS Register (32-bit). + inline void popfd() { _emitInstruction(INST_POPFD); } +#else + //! @brief Pop Stack into EFLAGS Register (64-bit). + inline void popfq() { _emitInstruction(INST_POPFQ); } +#endif + + //! @brief Push WORD/DWORD/QWORD Onto the Stack. + //! + //! @note 32-bit architecture pushed DWORD while 64-bit + //! pushes QWORD. 64-bit mode not provides instruction to + //! push 32-bit register/memory. + inline void push(const GPReg& src) + { + ASMJIT_ASSERT(src.isRegType(REG_TYPE_GPW) || src.isRegType(REG_TYPE_GPN)); + _emitInstruction(INST_PUSH, &src); + } + //! @brief Push WORD/DWORD/QWORD Onto the Stack. + inline void push(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == sizeof(sysint_t)); + _emitInstruction(INST_PUSH, &src); + } + //! @brief Push WORD/DWORD/QWORD Onto the Stack. + inline void push(const Imm& src) + { + _emitInstruction(INST_PUSH, &src); + } + +#if defined(ASMJIT_X86) + //! @brief Push All General-Purpose Registers. + //! + //! Push EAX, ECX, EDX, EBX, original ESP, EBP, ESI, and EDI. + inline void pushad() + { + _emitInstruction(INST_PUSHAD); + } +#endif // ASMJIT_X86 + + //! @brief Push EFLAGS Register (32-bit or 64-bit) onto the Stack. + inline void pushf() + { +#if defined(ASMJIT_X86) + pushfd(); +#else + pushfq(); +#endif + } + +#if defined(ASMJIT_X86) + //! @brief Push EFLAGS Register (32-bit) onto the Stack. + inline void pushfd() { _emitInstruction(INST_PUSHFD); } +#else + //! @brief Push EFLAGS Register (64-bit) onto the Stack. + inline void pushfq() { _emitInstruction(INST_PUSHFQ); } +#endif // ASMJIT_X86 + + //! @brief Rotate Bits Left. + //! @note @a src register can be only @c cl. + inline void rcl(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_RCL, &dst, &src); + } + //! @brief Rotate Bits Left. + inline void rcl(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_RCL, &dst, &src); + } + //! @brief Rotate Bits Left. + //! @note @a src register can be only @c cl. + inline void rcl(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_RCL, &dst, &src); + } + //! @brief Rotate Bits Left. + inline void rcl(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_RCL, &dst, &src); + } + + //! @brief Rotate Bits Right. + //! @note @a src register can be only @c cl. + inline void rcr(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_RCR, &dst, &src); + } + //! @brief Rotate Bits Right. + inline void rcr(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_RCR, &dst, &src); + } + //! @brief Rotate Bits Right. + //! @note @a src register can be only @c cl. + inline void rcr(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_RCR, &dst, &src); + } + //! @brief Rotate Bits Right. + inline void rcr(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_RCR, &dst, &src); + } + + //! @brief Read Time-Stamp Counter (Pentium). + inline void rdtsc() + { + _emitInstruction(INST_RDTSC); + } + + //! @brief Read Time-Stamp Counter and Processor ID (New). + inline void rdtscp() + { + _emitInstruction(INST_RDTSCP); + } + + //! @brief Load ECX/RCX BYTEs from DS:[ESI/RSI] to AL. + inline void rep_lodsb() + { + _emitInstruction(INST_REP_LODSB); + } + + //! @brief Load ECX/RCX DWORDs from DS:[ESI/RSI] to EAX. + inline void rep_lodsd() + { + _emitInstruction(INST_REP_LODSD); + } + +#if defined(ASMJIT_X64) + //! @brief Load ECX/RCX QWORDs from DS:[ESI/RSI] to RAX. + inline void rep_lodsq() + { + _emitInstruction(INST_REP_LODSQ); + } +#endif // ASMJIT_X64 + + //! @brief Load ECX/RCX WORDs from DS:[ESI/RSI] to AX. + inline void rep_lodsw() + { + _emitInstruction(INST_REP_LODSW); + } + + //! @brief Move ECX/RCX BYTEs from DS:[ESI/RSI] to ES:[EDI/RDI]. + inline void rep_movsb() + { + _emitInstruction(INST_REP_MOVSB); + } + + //! @brief Move ECX/RCX DWORDs from DS:[ESI/RSI] to ES:[EDI/RDI]. + inline void rep_movsd() + { + _emitInstruction(INST_REP_MOVSD); + } + +#if defined(ASMJIT_X64) + //! @brief Move ECX/RCX QWORDs from DS:[ESI/RSI] to ES:[EDI/RDI]. + inline void rep_movsq() + { + _emitInstruction(INST_REP_MOVSQ); + } +#endif // ASMJIT_X64 + + //! @brief Move ECX/RCX WORDs from DS:[ESI/RSI] to ES:[EDI/RDI]. + inline void rep_movsw() + { + _emitInstruction(INST_REP_MOVSW); + } + + //! @brief Fill ECX/RCX BYTEs at ES:[EDI/RDI] with AL. + inline void rep_stosb() + { + _emitInstruction(INST_REP_STOSB); + } + + //! @brief Fill ECX/RCX DWORDs at ES:[EDI/RDI] with EAX. + inline void rep_stosd() + { + _emitInstruction(INST_REP_STOSD); + } + +#if defined(ASMJIT_X64) + //! @brief Fill ECX/RCX QWORDs at ES:[EDI/RDI] with RAX. + inline void rep_stosq() + { + _emitInstruction(INST_REP_STOSQ); + } +#endif // ASMJIT_X64 + + //! @brief Fill ECX/RCX WORDs at ES:[EDI/RDI] with AX. + inline void rep_stosw() + { + _emitInstruction(INST_REP_STOSW); + } + + //! @brief Repeated find nonmatching BYTEs in ES:[EDI/RDI] and DS:[ESI/RDI]. + inline void repe_cmpsb() + { + _emitInstruction(INST_REPE_CMPSB); + } + + //! @brief Repeated find nonmatching DWORDs in ES:[EDI/RDI] and DS:[ESI/RDI]. + inline void repe_cmpsd() + { + _emitInstruction(INST_REPE_CMPSD); + } + +#if defined(ASMJIT_X64) + //! @brief Repeated find nonmatching QWORDs in ES:[EDI/RDI] and DS:[ESI/RDI]. + inline void repe_cmpsq() + { + _emitInstruction(INST_REPE_CMPSQ); + } +#endif // ASMJIT_X64 + + //! @brief Repeated find nonmatching WORDs in ES:[EDI/RDI] and DS:[ESI/RDI]. + inline void repe_cmpsw() + { + _emitInstruction(INST_REPE_CMPSW); + } + + //! @brief Find non-AL BYTE starting at ES:[EDI/RDI]. + inline void repe_scasb() + { + _emitInstruction(INST_REPE_SCASB); + } + + //! @brief Find non-EAX DWORD starting at ES:[EDI/RDI]. + inline void repe_scasd() + { + _emitInstruction(INST_REPE_SCASD); + } + +#if defined(ASMJIT_X64) + //! @brief Find non-RAX QWORD starting at ES:[EDI/RDI]. + inline void repe_scasq() + { + _emitInstruction(INST_REPE_SCASQ); + } +#endif // ASMJIT_X64 + + //! @brief Find non-AX WORD starting at ES:[EDI/RDI]. + inline void repe_scasw() + { + _emitInstruction(INST_REPE_SCASW); + } + + //! @brief Repeated find nonmatching BYTEs in ES:[EDI/RDI] and DS:[ESI/RDI]. + inline void repne_cmpsb() + { + _emitInstruction(INST_REPNE_CMPSB); + } + + //! @brief Repeated find nonmatching DWORDs in ES:[EDI/RDI] and DS:[ESI/RDI]. + inline void repne_cmpsd() + { + _emitInstruction(INST_REPNE_CMPSD); + } + +#if defined(ASMJIT_X64) + //! @brief Repeated find nonmatching QWORDs in ES:[EDI/RDI] and DS:[ESI/RDI]. + inline void repne_cmpsq() + { + _emitInstruction(INST_REPNE_CMPSQ); + } +#endif // ASMJIT_X64 + + //! @brief Repeated find nonmatching WORDs in ES:[EDI/RDI] and DS:[ESI/RDI]. + inline void repne_cmpsw() + { + _emitInstruction(INST_REPNE_CMPSW); + } + + //! @brief Find AL, starting at ES:[EDI/RDI]. + inline void repne_scasb() + { + _emitInstruction(INST_REPNE_SCASB); + } + + //! @brief Find EAX, starting at ES:[EDI/RDI]. + inline void repne_scasd() + { + _emitInstruction(INST_REPNE_SCASD); + } + +#if defined(ASMJIT_X64) + //! @brief Find RAX, starting at ES:[EDI/RDI]. + inline void repne_scasq() + { + _emitInstruction(INST_REPNE_SCASQ); + } +#endif // ASMJIT_X64 + + //! @brief Find AX, starting at ES:[EDI/RDI]. + inline void repne_scasw() + { + _emitInstruction(INST_REPNE_SCASW); + } + + //! @brief Return from Procedure. + inline void ret() + { + _emitInstruction(INST_RET); + } + + //! @brief Return from Procedure. + inline void ret(const Imm& imm16) + { + _emitInstruction(INST_RET, &imm16); + } + + //! @brief Rotate Bits Left. + //! @note @a src register can be only @c cl. + inline void rol(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_ROL, &dst, &src); + } + //! @brief Rotate Bits Left. + inline void rol(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_ROL, &dst, &src); + } + //! @brief Rotate Bits Left. + //! @note @a src register can be only @c cl. + inline void rol(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_ROL, &dst, &src); + } + //! @brief Rotate Bits Left. + inline void rol(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_ROL, &dst, &src); + } + + //! @brief Rotate Bits Right. + //! @note @a src register can be only @c cl. + inline void ror(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_ROR, &dst, &src); + } + //! @brief Rotate Bits Right. + inline void ror(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_ROR, &dst, &src); + } + //! @brief Rotate Bits Right. + //! @note @a src register can be only @c cl. + inline void ror(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_ROR, &dst, &src); + } + //! @brief Rotate Bits Right. + inline void ror(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_ROR, &dst, &src); + } + +#if defined(ASMJIT_X86) + //! @brief Store AH into Flags. + inline void sahf() + { + _emitInstruction(INST_SAHF); + } +#endif // ASMJIT_X86 + + //! @brief Integer subtraction with borrow. + inline void sbb(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_SBB, &dst, &src); + } + //! @brief Integer subtraction with borrow. + inline void sbb(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_SBB, &dst, &src); + } + //! @brief Integer subtraction with borrow. + inline void sbb(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_SBB, &dst, &src); + } + //! @brief Integer subtraction with borrow. + inline void sbb(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_SBB, &dst, &src); + } + //! @brief Integer subtraction with borrow. + inline void sbb(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_SBB, &dst, &src); + } + + //! @brief Shift Bits Left. + //! @note @a src register can be only @c cl. + inline void sal(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_SAL, &dst, &src); + } + //! @brief Shift Bits Left. + inline void sal(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_SAL, &dst, &src); + } + //! @brief Shift Bits Left. + //! @note @a src register can be only @c cl. + inline void sal(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_SAL, &dst, &src); + } + //! @brief Shift Bits Left. + inline void sal(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_SAL, &dst, &src); + } + + //! @brief Shift Bits Right. + //! @note @a src register can be only @c cl. + inline void sar(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_SAR, &dst, &src); + } + //! @brief Shift Bits Right. + inline void sar(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_SAR, &dst, &src); + } + //! @brief Shift Bits Right. + //! @note @a src register can be only @c cl. + inline void sar(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_SAR, &dst, &src); + } + //! @brief Shift Bits Right. + inline void sar(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_SAR, &dst, &src); + } + + //! @brief Set Byte on Condition. + inline void set(CONDITION cc, const GPReg& dst) + { + ASMJIT_ASSERT(dst.getSize() == 1); + _emitInstruction(ConditionToInstruction::toSetCC(cc), &dst); + } + + //! @brief Set Byte on Condition. + inline void set(CONDITION cc, const Mem& dst) + { + ASMJIT_ASSERT(dst.getSize() <= 1); + _emitInstruction(ConditionToInstruction::toSetCC(cc), &dst); + } + + //! @brief Set Byte on Condition. + inline void seta (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETA , &dst); } + //! @brief Set Byte on Condition. + inline void seta (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETA , &dst); } + //! @brief Set Byte on Condition. + inline void setae (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETAE , &dst); } + //! @brief Set Byte on Condition. + inline void setae (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETAE , &dst); } + //! @brief Set Byte on Condition. + inline void setb (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETB , &dst); } + //! @brief Set Byte on Condition. + inline void setb (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETB , &dst); } + //! @brief Set Byte on Condition. + inline void setbe (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETBE , &dst); } + //! @brief Set Byte on Condition. + inline void setbe (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETBE , &dst); } + //! @brief Set Byte on Condition. + inline void setc (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETC , &dst); } + //! @brief Set Byte on Condition. + inline void setc (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETC , &dst); } + //! @brief Set Byte on Condition. + inline void sete (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETE , &dst); } + //! @brief Set Byte on Condition. + inline void sete (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETE , &dst); } + //! @brief Set Byte on Condition. + inline void setg (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETG , &dst); } + //! @brief Set Byte on Condition. + inline void setg (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETG , &dst); } + //! @brief Set Byte on Condition. + inline void setge (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETGE , &dst); } + //! @brief Set Byte on Condition. + inline void setge (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETGE , &dst); } + //! @brief Set Byte on Condition. + inline void setl (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETL , &dst); } + //! @brief Set Byte on Condition. + inline void setl (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETL , &dst); } + //! @brief Set Byte on Condition. + inline void setle (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETLE , &dst); } + //! @brief Set Byte on Condition. + inline void setle (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETLE , &dst); } + //! @brief Set Byte on Condition. + inline void setna (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNA , &dst); } + //! @brief Set Byte on Condition. + inline void setna (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNA , &dst); } + //! @brief Set Byte on Condition. + inline void setnae(const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNAE, &dst); } + //! @brief Set Byte on Condition. + inline void setnae(const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNAE, &dst); } + //! @brief Set Byte on Condition. + inline void setnb (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNB , &dst); } + //! @brief Set Byte on Condition. + inline void setnb (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNB , &dst); } + //! @brief Set Byte on Condition. + inline void setnbe(const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNBE, &dst); } + //! @brief Set Byte on Condition. + inline void setnbe(const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNBE, &dst); } + //! @brief Set Byte on Condition. + inline void setnc (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNC , &dst); } + //! @brief Set Byte on Condition. + inline void setnc (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNC , &dst); } + //! @brief Set Byte on Condition. + inline void setne (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNE , &dst); } + //! @brief Set Byte on Condition. + inline void setne (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNE , &dst); } + //! @brief Set Byte on Condition. + inline void setng (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNG , &dst); } + //! @brief Set Byte on Condition. + inline void setng (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNG , &dst); } + //! @brief Set Byte on Condition. + inline void setnge(const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNGE, &dst); } + //! @brief Set Byte on Condition. + inline void setnge(const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNGE, &dst); } + //! @brief Set Byte on Condition. + inline void setnl (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNL , &dst); } + //! @brief Set Byte on Condition. + inline void setnl (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNL , &dst); } + //! @brief Set Byte on Condition. + inline void setnle(const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNLE, &dst); } + //! @brief Set Byte on Condition. + inline void setnle(const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNLE, &dst); } + //! @brief Set Byte on Condition. + inline void setno (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNO , &dst); } + //! @brief Set Byte on Condition. + inline void setno (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNO , &dst); } + //! @brief Set Byte on Condition. + inline void setnp (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNP , &dst); } + //! @brief Set Byte on Condition. + inline void setnp (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNP , &dst); } + //! @brief Set Byte on Condition. + inline void setns (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNS , &dst); } + //! @brief Set Byte on Condition. + inline void setns (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNS , &dst); } + //! @brief Set Byte on Condition. + inline void setnz (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNZ , &dst); } + //! @brief Set Byte on Condition. + inline void setnz (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNZ , &dst); } + //! @brief Set Byte on Condition. + inline void seto (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETO , &dst); } + //! @brief Set Byte on Condition. + inline void seto (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETO , &dst); } + //! @brief Set Byte on Condition. + inline void setp (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETP , &dst); } + //! @brief Set Byte on Condition. + inline void setp (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETP , &dst); } + //! @brief Set Byte on Condition. + inline void setpe (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETPE , &dst); } + //! @brief Set Byte on Condition. + inline void setpe (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETPE , &dst); } + //! @brief Set Byte on Condition. + inline void setpo (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETPO , &dst); } + //! @brief Set Byte on Condition. + inline void setpo (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETPO , &dst); } + //! @brief Set Byte on Condition. + inline void sets (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETS , &dst); } + //! @brief Set Byte on Condition. + inline void sets (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETS , &dst); } + //! @brief Set Byte on Condition. + inline void setz (const GPReg& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETZ , &dst); } + //! @brief Set Byte on Condition. + inline void setz (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETZ , &dst); } + + //! @brief Shift Bits Left. + //! @note @a src register can be only @c cl. + inline void shl(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_SHL, &dst, &src); + } + //! @brief Shift Bits Left. + inline void shl(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_SHL, &dst, &src); + } + //! @brief Shift Bits Left. + //! @note @a src register can be only @c cl. + inline void shl(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_SHL, &dst, &src); + } + //! @brief Shift Bits Left. + inline void shl(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_SHL, &dst, &src); + } + + //! @brief Shift Bits Right. + //! @note @a src register can be only @c cl. + inline void shr(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_SHR, &dst, &src); + } + //! @brief Shift Bits Right. + inline void shr(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_SHR, &dst, &src); + } + //! @brief Shift Bits Right. + //! @note @a src register can be only @c cl. + inline void shr(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_SHR, &dst, &src); + } + //! @brief Shift Bits Right. + inline void shr(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_SHR, &dst, &src); + } + + //! @brief Double Precision Shift Left. + //! @note src2 register can be only @c cl register. + inline void shld(const GPReg& dst, const GPReg& src1, const GPReg& src2) + { + _emitInstruction(INST_SHLD, &dst, &src1, &src2); + } + //! @brief Double Precision Shift Left. + inline void shld(const GPReg& dst, const GPReg& src1, const Imm& src2) + { + _emitInstruction(INST_SHLD, &dst, &src1, &src2); + } + //! @brief Double Precision Shift Left. + //! @note src2 register can be only @c cl register. + inline void shld(const Mem& dst, const GPReg& src1, const GPReg& src2) + { + _emitInstruction(INST_SHLD, &dst, &src1, &src2); + } + //! @brief Double Precision Shift Left. + inline void shld(const Mem& dst, const GPReg& src1, const Imm& src2) + { + _emitInstruction(INST_SHLD, &dst, &src1, &src2); + } + + //! @brief Double Precision Shift Right. + //! @note src2 register can be only @c cl register. + inline void shrd(const GPReg& dst, const GPReg& src1, const GPReg& src2) + { + _emitInstruction(INST_SHRD, &dst, &src1, &src2); + } + //! @brief Double Precision Shift Right. + inline void shrd(const GPReg& dst, const GPReg& src1, const Imm& src2) + { + _emitInstruction(INST_SHRD, &dst, &src1, &src2); + } + //! @brief Double Precision Shift Right. + //! @note src2 register can be only @c cl register. + inline void shrd(const Mem& dst, const GPReg& src1, const GPReg& src2) + { + _emitInstruction(INST_SHRD, &dst, &src1, &src2); + } + //! @brief Double Precision Shift Right. + inline void shrd(const Mem& dst, const GPReg& src1, const Imm& src2) + { + _emitInstruction(INST_SHRD, &dst, &src1, &src2); + } + + //! @brief Set Carry Flag to 1. + inline void stc() + { + _emitInstruction(INST_STC); + } + + //! @brief Set Direction Flag to 1. + inline void std() + { + _emitInstruction(INST_STD); + } + + //! @brief Subtract. + inline void sub(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_SUB, &dst, &src); + } + //! @brief Subtract. + inline void sub(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_SUB, &dst, &src); + } + //! @brief Subtract. + inline void sub(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_SUB, &dst, &src); + } + //! @brief Subtract. + inline void sub(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_SUB, &dst, &src); + } + //! @brief Subtract. + inline void sub(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_SUB, &dst, &src); + } + + //! @brief Logical Compare. + inline void test(const GPReg& op1, const GPReg& op2) + { + _emitInstruction(INST_TEST, &op1, &op2); + } + //! @brief Logical Compare. + inline void test(const GPReg& op1, const Imm& op2) + { + _emitInstruction(INST_TEST, &op1, &op2); + } + //! @brief Logical Compare. + inline void test(const Mem& op1, const GPReg& op2) + { + _emitInstruction(INST_TEST, &op1, &op2); + } + //! @brief Logical Compare. + inline void test(const Mem& op1, const Imm& op2) + { + _emitInstruction(INST_TEST, &op1, &op2); + } + + //! @brief Undefined instruction - Raise invalid opcode exception. + inline void ud2() + { + _emitInstruction(INST_UD2); + } + + //! @brief Exchange and Add. + inline void xadd(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_XADD, &dst, &src); + } + //! @brief Exchange and Add. + inline void xadd(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_XADD, &dst, &src); + } + + //! @brief Exchange Register/Memory with Register. + inline void xchg(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_XCHG, &dst, &src); + } + //! @brief Exchange Register/Memory with Register. + inline void xchg(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_XCHG, &dst, &src); + } + //! @brief Exchange Register/Memory with Register. + inline void xchg(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_XCHG, &src, &dst); + } + + //! @brief Exchange Register/Memory with Register. + inline void xor_(const GPReg& dst, const GPReg& src) + { + _emitInstruction(INST_XOR, &dst, &src); + } + //! @brief Exchange Register/Memory with Register. + inline void xor_(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_XOR, &dst, &src); + } + //! @brief Exchange Register/Memory with Register. + inline void xor_(const GPReg& dst, const Imm& src) + { + _emitInstruction(INST_XOR, &dst, &src); + } + //! @brief Exchange Register/Memory with Register. + inline void xor_(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_XOR, &dst, &src); + } + //! @brief Exchange Register/Memory with Register. + inline void xor_(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_XOR, &dst, &src); + } + + // -------------------------------------------------------------------------- + // [X87 Instructions (FPU)] + // -------------------------------------------------------------------------- + + //! @brief Compute 2^x - 1 (FPU). + inline void f2xm1() + { + _emitInstruction(INST_F2XM1); + } + + //! @brief Absolute Value of st(0) (FPU). + inline void fabs() + { + _emitInstruction(INST_FABS); + } + + //! @brief Add @a src to @a dst and store result in @a dst (FPU). + //! + //! @note One of dst or src must be st(0). + inline void fadd(const X87Reg& dst, const X87Reg& src) + { + ASMJIT_ASSERT(dst.getRegIndex() == 0 || src.getRegIndex() == 0); + _emitInstruction(INST_FADD, &dst, &src); + } + + //! @brief Add @a src to st(0) and store result in st(0) (FPU). + //! + //! @note SP-FP or DP-FP determined by @a adr size. + inline void fadd(const Mem& src) + { + _emitInstruction(INST_FADD, &src); + } + + //! @brief Add st(0) to @a dst and POP register stack (FPU). + inline void faddp(const X87Reg& dst = st(1)) + { + _emitInstruction(INST_FADDP, &dst); + } + + //! @brief Load Binary Coded Decimal (FPU). + inline void fbld(const Mem& src) + { + _emitInstruction(INST_FBLD, &src); + } + + //! @brief Store BCD Integer and Pop (FPU). + inline void fbstp(const Mem& dst) + { + _emitInstruction(INST_FBSTP, &dst); + } + + //! @brief Change st(0) Sign (FPU). + inline void fchs() + { + _emitInstruction(INST_FCHS); + } + + //! @brief Clear Exceptions (FPU). + //! + //! Clear floating-point exception flags after checking for pending unmasked + //! floating-point exceptions. + //! + //! Clears the floating-point exception flags (PE, UE, OE, ZE, DE, and IE), + //! the exception summary status flag (ES), the stack fault flag (SF), and + //! the busy flag (B) in the FPU status word. The FCLEX instruction checks + //! for and handles any pending unmasked floating-point exceptions before + //! clearing the exception flags. + inline void fclex() + { + _emitInstruction(INST_FCLEX); + } + + //! @brief FP Conditional Move (FPU). + inline void fcmovb(const X87Reg& src) + { + _emitInstruction(INST_FCMOVB, &src); + } + //! @brief FP Conditional Move (FPU). + inline void fcmovbe(const X87Reg& src) + { + _emitInstruction(INST_FCMOVBE, &src); + } + //! @brief FP Conditional Move (FPU). + inline void fcmove(const X87Reg& src) + { + _emitInstruction(INST_FCMOVE, &src); + } + //! @brief FP Conditional Move (FPU). + inline void fcmovnb(const X87Reg& src) + { + _emitInstruction(INST_FCMOVNB, &src); + } + //! @brief FP Conditional Move (FPU). + inline void fcmovnbe(const X87Reg& src) + { + _emitInstruction(INST_FCMOVNBE, &src); + } + //! @brief FP Conditional Move (FPU). + inline void fcmovne(const X87Reg& src) + { + _emitInstruction(INST_FCMOVNE, &src); + } + //! @brief FP Conditional Move (FPU). + inline void fcmovnu(const X87Reg& src) + { + _emitInstruction(INST_FCMOVNU, &src); + } + //! @brief FP Conditional Move (FPU). + inline void fcmovu(const X87Reg& src) + { + _emitInstruction(INST_FCMOVU, &src); + } + + //! @brief Compare st(0) with @a reg (FPU). + inline void fcom(const X87Reg& reg = st(1)) + { + _emitInstruction(INST_FCOM, ®); + } + //! @brief Compare st(0) with 4-byte or 8-byte FP at @a src (FPU). + inline void fcom(const Mem& src) + { + _emitInstruction(INST_FCOM, &src); + } + + //! @brief Compare st(0) with @a reg and pop the stack (FPU). + inline void fcomp(const X87Reg& reg = st(1)) + { + _emitInstruction(INST_FCOMP, ®); + } + //! @brief Compare st(0) with 4-byte or 8-byte FP at @a adr and pop the + //! stack (FPU). + inline void fcomp(const Mem& mem) + { + _emitInstruction(INST_FCOMP, &mem); + } + + //! @brief Compare st(0) with st(1) and pop register stack twice (FPU). + inline void fcompp() + { + _emitInstruction(INST_FCOMPP); + } + + //! @brief Compare st(0) and @a reg and Set EFLAGS (FPU). + inline void fcomi(const X87Reg& reg) + { + _emitInstruction(INST_FCOMI, ®); + } + + //! @brief Compare st(0) and @a reg and Set EFLAGS and pop the stack (FPU). + inline void fcomip(const X87Reg& reg) + { + _emitInstruction(INST_FCOMIP, ®); + } + + //! @brief Cosine (FPU). + //! + //! This instruction calculates the cosine of the source operand in + //! register st(0) and stores the result in st(0). + inline void fcos() + { + _emitInstruction(INST_FCOS); + } + + //! @brief Decrement Stack-Top Pointer (FPU). + //! + //! Subtracts one from the TOP field of the FPU status word (decrements + //! the top-ofstack pointer). If the TOP field contains a 0, it is set + //! to 7. The effect of this instruction is to rotate the stack by one + //! position. The contents of the FPU data registers and tag register + //! are not affected. + inline void fdecstp() + { + _emitInstruction(INST_FDECSTP); + } + + //! @brief Divide @a dst by @a src (FPU). + //! + //! @note One of @a dst or @a src register must be st(0). + inline void fdiv(const X87Reg& dst, const X87Reg& src) + { + ASMJIT_ASSERT(dst.getRegIndex() == 0 || src.getRegIndex() == 0); + _emitInstruction(INST_FDIV, &dst, &src); + } + //! @brief Divide st(0) by 32-bit or 64-bit FP value (FPU). + inline void fdiv(const Mem& src) + { + _emitInstruction(INST_FDIV, &src); + } + + //! @brief Divide @a reg by st(0) (FPU). + inline void fdivp(const X87Reg& reg = st(1)) + { + _emitInstruction(INST_FDIVP, ®); + } + + //! @brief Reverse Divide @a dst by @a src (FPU). + //! + //! @note One of @a dst or @a src register must be st(0). + inline void fdivr(const X87Reg& dst, const X87Reg& src) + { + ASMJIT_ASSERT(dst.getRegIndex() == 0 || src.getRegIndex() == 0); + _emitInstruction(INST_FDIVR, &dst, &src); + } + //! @brief Reverse Divide st(0) by 32-bit or 64-bit FP value (FPU). + inline void fdivr(const Mem& src) + { + _emitInstruction(INST_FDIVR, &src); + } + + //! @brief Reverse Divide @a reg by st(0) (FPU). + inline void fdivrp(const X87Reg& reg = st(1)) + { + _emitInstruction(INST_FDIVRP, ®); + } + + //! @brief Free Floating-Point Register (FPU). + //! + //! Sets the tag in the FPU tag register associated with register @a reg + //! to empty (11B). The contents of @a reg and the FPU stack-top pointer + //! (TOP) are not affected. + inline void ffree(const X87Reg& reg) + { + _emitInstruction(INST_FFREE, ®); + } + + //! @brief Add 16-bit or 32-bit integer to st(0) (FPU). + inline void fiadd(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4); + _emitInstruction(INST_FIADD, &src); + } + + //! @brief Compare st(0) with 16-bit or 32-bit Integer (FPU). + inline void ficom(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4); + _emitInstruction(INST_FICOM, &src); + } + + //! @brief Compare st(0) with 16-bit or 32-bit Integer and pop the stack (FPU). + inline void ficomp(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4); + _emitInstruction(INST_FICOMP, &src); + } + + //! @brief Divide st(0) by 32-bit or 16-bit integer (@a src) (FPU). + inline void fidiv(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4); + _emitInstruction(INST_FIDIV, &src); + } + + //! @brief Reverse Divide st(0) by 32-bit or 16-bit integer (@a src) (FPU). + inline void fidivr(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4); + _emitInstruction(INST_FIDIVR, &src); + } + + //! @brief Load 16-bit, 32-bit or 64-bit Integer and push it to the stack (FPU). + //! + //! Converts the signed-integer source operand into double extended-precision + //! floating point format and pushes the value onto the FPU register stack. + //! The source operand can be a word, doubleword, or quadword integer. It is + //! loaded without rounding errors. The sign of the source operand is + //! preserved. + inline void fild(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4 || src.getSize() == 8); + _emitInstruction(INST_FILD, &src); + } + + //! @brief Multiply st(0) by 16-bit or 32-bit integer and store it + //! to st(0) (FPU). + inline void fimul(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4); + _emitInstruction(INST_FIMUL, &src); + } + + //! @brief Increment Stack-Top Pointer (FPU). + //! + //! Adds one to the TOP field of the FPU status word (increments the + //! top-of-stack pointer). If the TOP field contains a 7, it is set to 0. + //! The effect of this instruction is to rotate the stack by one position. + //! The contents of the FPU data registers and tag register are not affected. + //! This operation is not equivalent to popping the stack, because the tag + //! for the previous top-of-stack register is not marked empty. + inline void fincstp() + { + _emitInstruction(INST_FINCSTP); + } + + //! @brief Initialize Floating-Point Unit (FPU). + //! + //! Initialize FPU after checking for pending unmasked floating-point + //! exceptions. + inline void finit() + { + _emitInstruction(INST_FINIT); + } + + //! @brief Subtract 16-bit or 32-bit integer from st(0) and store result to + //! st(0) (FPU). + inline void fisub(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4); + _emitInstruction(INST_FISUB, &src); + } + + //! @brief Reverse Subtract 16-bit or 32-bit integer from st(0) and + //! store result to st(0) (FPU). + inline void fisubr(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == 4); + _emitInstruction(INST_FISUBR, &src); + } + + //! @brief Initialize Floating-Point Unit (FPU). + //! + //! Initialize FPU without checking for pending unmasked floating-point + //! exceptions. + inline void fninit() + { + _emitInstruction(INST_FNINIT); + } + + //! @brief Store st(0) as 16-bit or 32-bit Integer to @a dst (FPU). + inline void fist(const Mem& dst) + { + ASMJIT_ASSERT(dst.getSize() == 2 || dst.getSize() == 4); + _emitInstruction(INST_FIST, &dst); + } + + //! @brief Store st(0) as 16-bit, 32-bit or 64-bit Integer to @a dst and pop + //! stack (FPU). + inline void fistp(const Mem& dst) + { + ASMJIT_ASSERT(dst.getSize() == 2 || dst.getSize() == 4 || dst.getSize() == 8); + _emitInstruction(INST_FISTP, &dst); + } + + //! @brief Push 32-bit, 64-bit or 80-bit Floating Point Value onto the FPU + //! register stack (FPU). + inline void fld(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 4 || src.getSize() == 8 || src.getSize() == 10); + _emitInstruction(INST_FLD, &src); + } + + //! @brief Push @a reg onto the FPU register stack (FPU). + inline void fld(const X87Reg& reg) + { + _emitInstruction(INST_FLD, ®); + } + + //! @brief Push +1.0 onto the FPU register stack (FPU). + inline void fld1() + { + _emitInstruction(INST_FLD1); + } + + //! @brief Push log2(10) onto the FPU register stack (FPU). + inline void fldl2t() + { + _emitInstruction(INST_FLDL2T); + } + + //! @brief Push log2(e) onto the FPU register stack (FPU). + inline void fldl2e() + { + _emitInstruction(INST_FLDL2E); + } + + //! @brief Push pi onto the FPU register stack (FPU). + inline void fldpi() + { + _emitInstruction(INST_FLDPI); + } + + //! @brief Push log10(2) onto the FPU register stack (FPU). + inline void fldlg2() + { + _emitInstruction(INST_FLDLG2); + } + + //! @brief Push ln(2) onto the FPU register stack (FPU). + inline void fldln2() + { + _emitInstruction(INST_FLDLN2); + } + + //! @brief Push +0.0 onto the FPU register stack (FPU). + inline void fldz() + { + _emitInstruction(INST_FLDZ); + } + + //! @brief Load x87 FPU Control Word (2 bytes) (FPU). + inline void fldcw(const Mem& src) + { + _emitInstruction(INST_FLDCW, &src); + } + + //! @brief Load x87 FPU Environment (14 or 28 bytes) (FPU). + inline void fldenv(const Mem& src) + { + _emitInstruction(INST_FLDENV, &src); + } + + //! @brief Multiply @a dst by @a src and store result in @a dst (FPU). + //! + //! @note One of dst or src must be st(0). + inline void fmul(const X87Reg& dst, const X87Reg& src) + { + ASMJIT_ASSERT(dst.getRegIndex() == 0 || src.getRegIndex() == 0); + _emitInstruction(INST_FMUL, &dst, &src); + } + //! @brief Multiply st(0) by @a src and store result in st(0) (FPU). + //! + //! @note SP-FP or DP-FP determined by @a adr size. + inline void fmul(const Mem& src) + { + _emitInstruction(INST_FMUL, &src); + } + + //! @brief Multiply st(0) by @a dst and POP register stack (FPU). + inline void fmulp(const X87Reg& dst = st(1)) + { + _emitInstruction(INST_FMULP, &dst); + } + + //! @brief Clear Exceptions (FPU). + //! + //! Clear floating-point exception flags without checking for pending + //! unmasked floating-point exceptions. + //! + //! Clears the floating-point exception flags (PE, UE, OE, ZE, DE, and IE), + //! the exception summary status flag (ES), the stack fault flag (SF), and + //! the busy flag (B) in the FPU status word. The FCLEX instruction does + //! not checks for and handles any pending unmasked floating-point exceptions + //! before clearing the exception flags. + inline void fnclex() + { + _emitInstruction(INST_FNCLEX); + } + + //! @brief No Operation (FPU). + inline void fnop() + { + _emitInstruction(INST_FNOP); + } + + //! @brief Save FPU State (FPU). + //! + //! Store FPU environment to m94byte or m108byte without + //! checking for pending unmasked FP exceptions. + //! Then re-initialize the FPU. + inline void fnsave(const Mem& dst) + { + _emitInstruction(INST_FNSAVE, &dst); + } + + //! @brief Store x87 FPU Environment (FPU). + //! + //! Store FPU environment to @a dst (14 or 28 Bytes) without checking for + //! pending unmasked floating-point exceptions. Then mask all floating + //! point exceptions. + inline void fnstenv(const Mem& dst) + { + _emitInstruction(INST_FNSTENV, &dst); + } + + //! @brief Store x87 FPU Control Word (FPU). + //! + //! Store FPU control word to @a dst (2 Bytes) without checking for pending + //! unmasked floating-point exceptions. + inline void fnstcw(const Mem& dst) + { + _emitInstruction(INST_FNSTCW, &dst); + } + + //! @brief Store x87 FPU Status Word (2 Bytes) (FPU). + inline void fnstsw(const GPReg& dst) + { + ASMJIT_ASSERT(dst.isRegCode(REG_AX)); + _emitInstruction(INST_FNSTSW, &dst); + } + //! @brief Store x87 FPU Status Word (2 Bytes) (FPU). + inline void fnstsw(const Mem& dst) + { + _emitInstruction(INST_FNSTSW, &dst); + } + + //! @brief Partial Arctangent (FPU). + //! + //! Replace st(1) with arctan(st(1)/st(0)) and pop the register stack. + inline void fpatan() + { + _emitInstruction(INST_FPATAN); + } + + //! @brief Partial Remainder (FPU). + //! + //! Replace st(0) with the remainder obtained from dividing st(0) by st(1). + inline void fprem() + { + _emitInstruction(INST_FPREM); + } + + //! @brief Partial Remainder (FPU). + //! + //! Replace st(0) with the IEEE remainder obtained from dividing st(0) by + //! st(1). + inline void fprem1() + { + _emitInstruction(INST_FPREM1); + } + + //! @brief Partial Tangent (FPU). + //! + //! Replace st(0) with its tangent and push 1 onto the FPU stack. + inline void fptan() + { + _emitInstruction(INST_FPTAN); + } + + //! @brief Round to Integer (FPU). + //! + //! Rount st(0) to an Integer. + inline void frndint() + { + _emitInstruction(INST_FRNDINT); + } + + //! @brief Restore FPU State (FPU). + //! + //! Load FPU state from src (94 or 108 bytes). + inline void frstor(const Mem& src) + { + _emitInstruction(INST_FRSTOR, &src); + } + + //! @brief Save FPU State (FPU). + //! + //! Store FPU state to 94 or 108-bytes after checking for + //! pending unmasked FP exceptions. Then reinitialize + //! the FPU. + inline void fsave(const Mem& dst) + { + _emitInstruction(INST_FSAVE, &dst); + } + + //! @brief Scale (FPU). + //! + //! Scale st(0) by st(1). + inline void fscale() + { + _emitInstruction(INST_FSCALE); + } + + //! @brief Sine (FPU). + //! + //! This instruction calculates the sine of the source operand in + //! register st(0) and stores the result in st(0). + inline void fsin() + { + _emitInstruction(INST_FSIN); + } + + //! @brief Sine and Cosine (FPU). + //! + //! Compute the sine and cosine of st(0); replace st(0) with + //! the sine, and push the cosine onto the register stack. + inline void fsincos() + { + _emitInstruction(INST_FSINCOS); + } + + //! @brief Square Root (FPU). + //! + //! Calculates square root of st(0) and stores the result in st(0). + inline void fsqrt() + { + _emitInstruction(INST_FSQRT); + } + + //! @brief Store Floating Point Value (FPU). + //! + //! Store st(0) as 32-bit or 64-bit floating point value to @a dst. + inline void fst(const Mem& dst) + { + ASMJIT_ASSERT(dst.getSize() == 4 || dst.getSize() == 8); + _emitInstruction(INST_FST, &dst); + } + + //! @brief Store Floating Point Value (FPU). + //! + //! Store st(0) to @a reg. + inline void fst(const X87Reg& reg) + { + _emitInstruction(INST_FST, ®); + } + + //! @brief Store Floating Point Value and Pop Register Stack (FPU). + //! + //! Store st(0) as 32-bit or 64-bit floating point value to @a dst + //! and pop register stack. + inline void fstp(const Mem& dst) + { + ASMJIT_ASSERT(dst.getSize() == 4 || dst.getSize() == 8 || dst.getSize() == 10); + _emitInstruction(INST_FSTP, &dst); + } + + //! @brief Store Floating Point Value and Pop Register Stack (FPU). + //! + //! Store st(0) to @a reg and pop register stack. + inline void fstp(const X87Reg& reg) + { + _emitInstruction(INST_FSTP, ®); + } + + //! @brief Store x87 FPU Control Word (FPU). + //! + //! Store FPU control word to @a dst (2 Bytes) after checking for pending + //! unmasked floating-point exceptions. + inline void fstcw(const Mem& dst) + { + _emitInstruction(INST_FSTCW, &dst); + } + + //! @brief Store x87 FPU Environment (FPU). + //! + //! Store FPU environment to @a dst (14 or 28 Bytes) after checking for + //! pending unmasked floating-point exceptions. Then mask all floating + //! point exceptions. + inline void fstenv(const Mem& dst) + { + _emitInstruction(INST_FSTENV, &dst); + } + + //! @brief Store x87 FPU Status Word (2 Bytes) (FPU). + inline void fstsw(const GPReg& dst) + { + ASMJIT_ASSERT(dst.isRegCode(REG_AX)); + _emitInstruction(INST_FSTSW, &dst); + } + //! @brief Store x87 FPU Status Word (2 Bytes) (FPU). + inline void fstsw(const Mem& dst) + { + _emitInstruction(INST_FSTSW, &dst); + } + + //! @brief Subtract @a src from @a dst and store result in @a dst (FPU). + //! + //! @note One of dst or src must be st(0). + inline void fsub(const X87Reg& dst, const X87Reg& src) + { + ASMJIT_ASSERT(dst.getRegIndex() == 0 || src.getRegIndex() == 0); + _emitInstruction(INST_FSUB, &dst, &src); + } + //! @brief Subtract @a src from st(0) and store result in st(0) (FPU). + //! + //! @note SP-FP or DP-FP determined by @a adr size. + inline void fsub(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 4 || src.getSize() == 8); + _emitInstruction(INST_FSUB, &src); + } + + //! @brief Subtract st(0) from @a dst and POP register stack (FPU). + inline void fsubp(const X87Reg& dst = st(1)) + { + _emitInstruction(INST_FSUBP, &dst); + } + + //! @brief Reverse Subtract @a src from @a dst and store result in @a dst (FPU). + //! + //! @note One of dst or src must be st(0). + inline void fsubr(const X87Reg& dst, const X87Reg& src) + { + ASMJIT_ASSERT(dst.getRegIndex() == 0 || src.getRegIndex() == 0); + _emitInstruction(INST_FSUBR, &dst, &src); + } + + //! @brief Reverse Subtract @a src from st(0) and store result in st(0) (FPU). + //! + //! @note SP-FP or DP-FP determined by @a adr size. + inline void fsubr(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 4 || src.getSize() == 8); + _emitInstruction(INST_FSUBR, &src); + } + + //! @brief Reverse Subtract st(0) from @a dst and POP register stack (FPU). + inline void fsubrp(const X87Reg& dst = st(1)) + { + _emitInstruction(INST_FSUBRP, &dst); + } + + //! @brief Floating point test - Compare st(0) with 0.0. (FPU). + inline void ftst() + { + _emitInstruction(INST_FTST); + } + + //! @brief Unordered Compare st(0) with @a reg (FPU). + inline void fucom(const X87Reg& reg = st(1)) + { + _emitInstruction(INST_FUCOM, ®); + } + + //! @brief Unordered Compare st(0) and @a reg, check for ordered values + //! and Set EFLAGS (FPU). + inline void fucomi(const X87Reg& reg) + { + _emitInstruction(INST_FUCOMI, ®); + } + + //! @brief UnorderedCompare st(0) and @a reg, Check for ordered values + //! and Set EFLAGS and pop the stack (FPU). + inline void fucomip(const X87Reg& reg = st(1)) + { + _emitInstruction(INST_FUCOMIP, ®); + } + + //! @brief Unordered Compare st(0) with @a reg and pop register stack (FPU). + inline void fucomp(const X87Reg& reg = st(1)) + { + _emitInstruction(INST_FUCOMP, ®); + } + + //! @brief Unordered compare st(0) with st(1) and pop register stack twice + //! (FPU). + inline void fucompp() + { + _emitInstruction(INST_FUCOMPP); + } + + inline void fwait() + { + _emitInstruction(INST_FWAIT); + } + + //! @brief Examine st(0) (FPU). + //! + //! Examines the contents of the ST(0) register and sets the condition code + //! flags C0, C2, and C3 in the FPU status word to indicate the class of + //! value or number in the register. + inline void fxam() + { + _emitInstruction(INST_FXAM); + } + + //! @brief Exchange Register Contents (FPU). + //! + //! Exchange content of st(0) with @a reg. + inline void fxch(const X87Reg& reg = st(1)) + { + _emitInstruction(INST_FXCH, ®); + } + + //! @brief Restore FP And MMX(tm) State And Streaming SIMD Extension State + //! (FPU, MMX, SSE). + //! + //! Load FP and MMX(tm) technology and Streaming SIMD Extension state from + //! src (512 bytes). + inline void fxrstor(const Mem& src) + { + _emitInstruction(INST_FXRSTOR, &src); + } + + //! @brief Store FP and MMX(tm) State and Streaming SIMD Extension State + //! (FPU, MMX, SSE). + //! + //! Store FP and MMX(tm) technology state and Streaming SIMD Extension state + //! to dst (512 bytes). + inline void fxsave(const Mem& dst) + { + _emitInstruction(INST_FXSAVE, &dst); + } + + //! @brief Extract Exponent and Significand (FPU). + //! + //! Separate value in st(0) into exponent and significand, store exponent + //! in st(0), and push the significand onto the register stack. + inline void fxtract() + { + _emitInstruction(INST_FXTRACT); + } + + //! @brief Compute y * log2(x). + //! + //! Replace st(1) with (st(1) * log2st(0)) and pop the register stack. + inline void fyl2x() + { + _emitInstruction(INST_FYL2X); + } + + //! @brief Compute y * log_2(x+1). + //! + //! Replace st(1) with (st(1) * (log2st(0) + 1.0)) and pop the register stack. + inline void fyl2xp1() + { + _emitInstruction(INST_FYL2XP1); + } + + // -------------------------------------------------------------------------- + // [MMX] + // -------------------------------------------------------------------------- + + //! @brief Empty MMX state. + inline void emms() + { + _emitInstruction(INST_EMMS); + } + + //! @brief Move DWord (MMX). + inline void movd(const Mem& dst, const MMReg& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + //! @brief Move DWord (MMX). + inline void movd(const GPReg& dst, const MMReg& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + //! @brief Move DWord (MMX). + inline void movd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + //! @brief Move DWord (MMX). + inline void movd(const MMReg& dst, const GPReg& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + + //! @brief Move QWord (MMX). + inline void movq(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } + //! @brief Move QWord (MMX). + inline void movq(const Mem& dst, const MMReg& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#if defined(ASMJIT_X64) + //! @brief Move QWord (MMX). + inline void movq(const GPReg& dst, const MMReg& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#endif + //! @brief Move QWord (MMX). + inline void movq(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#if defined(ASMJIT_X64) + //! @brief Move QWord (MMX). + inline void movq(const MMReg& dst, const GPReg& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#endif + + //! @brief Pack with Unsigned Saturation (MMX). + inline void packuswb(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PACKUSWB, &dst, &src); + } + //! @brief Pack with Unsigned Saturation (MMX). + inline void packuswb(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PACKUSWB, &dst, &src); + } + + //! @brief Packed BYTE Add (MMX). + inline void paddb(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PADDB, &dst, &src); + } + //! @brief Packed BYTE Add (MMX). + inline void paddb(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDB, &dst, &src); + } + + //! @brief Packed WORD Add (MMX). + inline void paddw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PADDW, &dst, &src); + } + //! @brief Packed WORD Add (MMX). + inline void paddw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDW, &dst, &src); + } + + //! @brief Packed DWORD Add (MMX). + inline void paddd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PADDD, &dst, &src); + } + //! @brief Packed DWORD Add (MMX). + inline void paddd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDD, &dst, &src); + } + + //! @brief Packed Add with Saturation (MMX). + inline void paddsb(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PADDSB, &dst, &src); + } + //! @brief Packed Add with Saturation (MMX). + inline void paddsb(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDSB, &dst, &src); + } + + //! @brief Packed Add with Saturation (MMX). + inline void paddsw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PADDSW, &dst, &src); + } + //! @brief Packed Add with Saturation (MMX). + inline void paddsw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDSW, &dst, &src); + } + + //! @brief Packed Add Unsigned with Saturation (MMX). + inline void paddusb(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PADDUSB, &dst, &src); + } + //! @brief Packed Add Unsigned with Saturation (MMX). + inline void paddusb(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDUSB, &dst, &src); + } + + //! @brief Packed Add Unsigned with Saturation (MMX). + inline void paddusw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PADDUSW, &dst, &src); + } + //! @brief Packed Add Unsigned with Saturation (MMX). + inline void paddusw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDUSW, &dst, &src); + } + + //! @brief Logical AND (MMX). + inline void pand(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PAND, &dst, &src); + } + //! @brief Logical AND (MMX). + inline void pand(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PAND, &dst, &src); + } + + //! @brief Logical AND Not (MMX). + inline void pandn(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PANDN, &dst, &src); + } + //! @brief Logical AND Not (MMX). + inline void pandn(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PANDN, &dst, &src); + } + + //! @brief Packed Compare for Equal (BYTES) (MMX). + inline void pcmpeqb(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PCMPEQB, &dst, &src); + } + //! @brief Packed Compare for Equal (BYTES) (MMX). + inline void pcmpeqb(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQB, &dst, &src); + } + + //! @brief Packed Compare for Equal (WORDS) (MMX). + inline void pcmpeqw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PCMPEQW, &dst, &src); + } + //! @brief Packed Compare for Equal (WORDS) (MMX). + inline void pcmpeqw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQW, &dst, &src); + } + + //! @brief Packed Compare for Equal (DWORDS) (MMX). + inline void pcmpeqd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PCMPEQD, &dst, &src); + } + //! @brief Packed Compare for Equal (DWORDS) (MMX). + inline void pcmpeqd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQD, &dst, &src); + } + + //! @brief Packed Compare for Greater Than (BYTES) (MMX). + inline void pcmpgtb(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PCMPGTB, &dst, &src); + } + //! @brief Packed Compare for Greater Than (BYTES) (MMX). + inline void pcmpgtb(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTB, &dst, &src); + } + + //! @brief Packed Compare for Greater Than (WORDS) (MMX). + inline void pcmpgtw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PCMPGTW, &dst, &src); + } + //! @brief Packed Compare for Greater Than (WORDS) (MMX). + inline void pcmpgtw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTW, &dst, &src); + } + + //! @brief Packed Compare for Greater Than (DWORDS) (MMX). + inline void pcmpgtd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PCMPGTD, &dst, &src); + } + //! @brief Packed Compare for Greater Than (DWORDS) (MMX). + inline void pcmpgtd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTD, &dst, &src); + } + + //! @brief Packed Multiply High (MMX). + inline void pmulhw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PMULHW, &dst, &src); + } + //! @brief Packed Multiply High (MMX). + inline void pmulhw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMULHW, &dst, &src); + } + + //! @brief Packed Multiply Low (MMX). + inline void pmullw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PMULLW, &dst, &src); + } + //! @brief Packed Multiply Low (MMX). + inline void pmullw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMULLW, &dst, &src); + } + + //! @brief Bitwise Logical OR (MMX). + inline void por(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_POR, &dst, &src); + } + //! @brief Bitwise Logical OR (MMX). + inline void por(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_POR, &dst, &src); + } + + //! @brief Packed Multiply and Add (MMX). + inline void pmaddwd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PMADDWD, &dst, &src); + } + //! @brief Packed Multiply and Add (MMX). + inline void pmaddwd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMADDWD, &dst, &src); + } + + //! @brief Packed Shift Left Logical (MMX). + inline void pslld(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSLLD, &dst, &src); + } + //! @brief Packed Shift Left Logical (MMX). + inline void pslld(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSLLD, &dst, &src); + } + //! @brief Packed Shift Left Logical (MMX). + inline void pslld(const MMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSLLD, &dst, &src); + } + + //! @brief Packed Shift Left Logical (MMX). + inline void psllq(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSLLQ, &dst, &src); + } + //! @brief Packed Shift Left Logical (MMX). + inline void psllq(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSLLQ, &dst, &src); + } + //! @brief Packed Shift Left Logical (MMX). + inline void psllq(const MMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSLLQ, &dst, &src); + } + + //! @brief Packed Shift Left Logical (MMX). + inline void psllw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSLLW, &dst, &src); + } + //! @brief Packed Shift Left Logical (MMX). + inline void psllw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSLLW, &dst, &src); + } + //! @brief Packed Shift Left Logical (MMX). + inline void psllw(const MMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSLLW, &dst, &src); + } + + //! @brief Packed Shift Right Arithmetic (MMX). + inline void psrad(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSRAD, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (MMX). + inline void psrad(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSRAD, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (MMX). + inline void psrad(const MMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSRAD, &dst, &src); + } + + //! @brief Packed Shift Right Arithmetic (MMX). + inline void psraw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSRAW, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (MMX). + inline void psraw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSRAW, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (MMX). + inline void psraw(const MMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSRAW, &dst, &src); + } + + //! @brief Packed Shift Right Logical (MMX). + inline void psrld(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSRLD, &dst, &src); + } + //! @brief Packed Shift Right Logical (MMX). + inline void psrld(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSRLD, &dst, &src); + } + //! @brief Packed Shift Right Logical (MMX). + inline void psrld(const MMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSRLD, &dst, &src); + } + + //! @brief Packed Shift Right Logical (MMX). + inline void psrlq(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSRLQ, &dst, &src); + } + //! @brief Packed Shift Right Logical (MMX). + inline void psrlq(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSRLQ, &dst, &src); + } + //! @brief Packed Shift Right Logical (MMX). + inline void psrlq(const MMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSRLQ, &dst, &src); + } + + //! @brief Packed Shift Right Logical (MMX). + inline void psrlw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSRLW, &dst, &src); + } + //! @brief Packed Shift Right Logical (MMX). + inline void psrlw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSRLW, &dst, &src); + } + //! @brief Packed Shift Right Logical (MMX). + inline void psrlw(const MMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSRLW, &dst, &src); + } + + //! @brief Packed Subtract (MMX). + inline void psubb(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSUBB, &dst, &src); + } + //! @brief Packed Subtract (MMX). + inline void psubb(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBB, &dst, &src); + } + + //! @brief Packed Subtract (MMX). + inline void psubw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSUBW, &dst, &src); + } + //! @brief Packed Subtract (MMX). + inline void psubw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBW, &dst, &src); + } + + //! @brief Packed Subtract (MMX). + inline void psubd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSUBD, &dst, &src); + } + //! @brief Packed Subtract (MMX). + inline void psubd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBD, &dst, &src); + } + + //! @brief Packed Subtract with Saturation (MMX). + inline void psubsb(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSUBSB, &dst, &src); + } + //! @brief Packed Subtract with Saturation (MMX). + inline void psubsb(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBSB, &dst, &src); + } + + //! @brief Packed Subtract with Saturation (MMX). + inline void psubsw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSUBSW, &dst, &src); + } + //! @brief Packed Subtract with Saturation (MMX). + inline void psubsw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBSW, &dst, &src); + } + + //! @brief Packed Subtract with Unsigned Saturation (MMX). + inline void psubusb(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSUBUSB, &dst, &src); + } + //! @brief Packed Subtract with Unsigned Saturation (MMX). + inline void psubusb(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBUSB, &dst, &src); + } + + //! @brief Packed Subtract with Unsigned Saturation (MMX). + inline void psubusw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSUBUSW, &dst, &src); + } + //! @brief Packed Subtract with Unsigned Saturation (MMX). + inline void psubusw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBUSW, &dst, &src); + } + + //! @brief Unpack High Packed Data (MMX). + inline void punpckhbw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PUNPCKHBW, &dst, &src); + } + //! @brief Unpack High Packed Data (MMX). + inline void punpckhbw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHBW, &dst, &src); + } + + //! @brief Unpack High Packed Data (MMX). + inline void punpckhwd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PUNPCKHWD, &dst, &src); + } + //! @brief Unpack High Packed Data (MMX). + inline void punpckhwd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHWD, &dst, &src); + } + + //! @brief Unpack High Packed Data (MMX). + inline void punpckhdq(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PUNPCKHDQ, &dst, &src); + } + //! @brief Unpack High Packed Data (MMX). + inline void punpckhdq(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHDQ, &dst, &src); + } + + //! @brief Unpack High Packed Data (MMX). + inline void punpcklbw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PUNPCKLBW, &dst, &src); + } + //! @brief Unpack High Packed Data (MMX). + inline void punpcklbw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLBW, &dst, &src); + } + + //! @brief Unpack High Packed Data (MMX). + inline void punpcklwd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PUNPCKLWD, &dst, &src); + } + //! @brief Unpack High Packed Data (MMX). + inline void punpcklwd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLWD, &dst, &src); + } + + //! @brief Unpack High Packed Data (MMX). + inline void punpckldq(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PUNPCKLDQ, &dst, &src); + } + //! @brief Unpack High Packed Data (MMX). + inline void punpckldq(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLDQ, &dst, &src); + } + + //! @brief Bitwise Exclusive OR (MMX). + inline void pxor(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PXOR, &dst, &src); + } + //! @brief Bitwise Exclusive OR (MMX). + inline void pxor(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PXOR, &dst, &src); + } + + // ------------------------------------------------------------------------- + // [3dNow] + // ------------------------------------------------------------------------- + + //! @brief Faster EMMS (3dNow!). + //! + //! @note Use only for early AMD processors where is only 3dNow! or SSE. If + //! CPU contains SSE2, it's better to use @c emms() ( @c femms() is mapped + //! to @c emms() ). + inline void femms() + { + _emitInstruction(INST_FEMMS); + } + + //! @brief Packed SP-FP to Integer Convert (3dNow!). + inline void pf2id(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PF2ID, &dst, &src); + } + //! @brief Packed SP-FP to Integer Convert (3dNow!). + inline void pf2id(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PF2ID, &dst, &src); + } + + //! @brief Packed SP-FP to Integer Word Convert (3dNow!). + inline void pf2iw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PF2IW, &dst, &src); + } + //! @brief Packed SP-FP to Integer Word Convert (3dNow!). + inline void pf2iw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PF2IW, &dst, &src); + } + + //! @brief Packed SP-FP Accumulate (3dNow!). + inline void pfacc(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFACC, &dst, &src); + } + //! @brief Packed SP-FP Accumulate (3dNow!). + inline void pfacc(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFACC, &dst, &src); + } + + //! @brief Packed SP-FP Addition (3dNow!). + inline void pfadd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFADD, &dst, &src); + } + //! @brief Packed SP-FP Addition (3dNow!). + inline void pfadd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFADD, &dst, &src); + } + + //! @brief Packed SP-FP Compare - dst == src (3dNow!). + inline void pfcmpeq(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFCMPEQ, &dst, &src); + } + //! @brief Packed SP-FP Compare - dst == src (3dNow!). + inline void pfcmpeq(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFCMPEQ, &dst, &src); + } + + //! @brief Packed SP-FP Compare - dst >= src (3dNow!). + inline void pfcmpge(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFCMPGE, &dst, &src); + } + //! @brief Packed SP-FP Compare - dst >= src (3dNow!). + inline void pfcmpge(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFCMPGE, &dst, &src); + } + + //! @brief Packed SP-FP Compare - dst > src (3dNow!). + inline void pfcmpgt(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFCMPGT, &dst, &src); + } + //! @brief Packed SP-FP Compare - dst > src (3dNow!). + inline void pfcmpgt(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFCMPGT, &dst, &src); + } + + //! @brief Packed SP-FP Maximum (3dNow!). + inline void pfmax(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFMAX, &dst, &src); + } + //! @brief Packed SP-FP Maximum (3dNow!). + inline void pfmax(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFMAX, &dst, &src); + } + + //! @brief Packed SP-FP Minimum (3dNow!). + inline void pfmin(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFMIN, &dst, &src); + } + //! @brief Packed SP-FP Minimum (3dNow!). + inline void pfmin(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFMIN, &dst, &src); + } + + //! @brief Packed SP-FP Multiply (3dNow!). + inline void pfmul(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFMUL, &dst, &src); + } + //! @brief Packed SP-FP Multiply (3dNow!). + inline void pfmul(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFMUL, &dst, &src); + } + + //! @brief Packed SP-FP Negative Accumulate (3dNow!). + inline void pfnacc(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFNACC, &dst, &src); + } + //! @brief Packed SP-FP Negative Accumulate (3dNow!). + inline void pfnacc(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFNACC, &dst, &src); + } + + //! @brief Packed SP-FP Mixed Accumulate (3dNow!). + inline void pfpnaxx(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFPNACC, &dst, &src); + } + //! @brief Packed SP-FP Mixed Accumulate (3dNow!). + inline void pfpnacc(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFPNACC, &dst, &src); + } + + //! @brief Packed SP-FP Reciprocal Approximation (3dNow!). + inline void pfrcp(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFRCP, &dst, &src); + } + //! @brief Packed SP-FP Reciprocal Approximation (3dNow!). + inline void pfrcp(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFRCP, &dst, &src); + } + + //! @brief Packed SP-FP Reciprocal, First Iteration Step (3dNow!). + inline void pfrcpit1(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFRCPIT1, &dst, &src); + } + //! @brief Packed SP-FP Reciprocal, First Iteration Step (3dNow!). + inline void pfrcpit1(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFRCPIT1, &dst, &src); + } + + //! @brief Packed SP-FP Reciprocal, Second Iteration Step (3dNow!). + inline void pfrcpit2(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFRCPIT2, &dst, &src); + } + //! @brief Packed SP-FP Reciprocal, Second Iteration Step (3dNow!). + inline void pfrcpit2(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFRCPIT2, &dst, &src); + } + + //! @brief Packed SP-FP Reciprocal Square Root, First Iteration Step (3dNow!). + inline void pfrsqit1(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFRSQIT1, &dst, &src); + } + //! @brief Packed SP-FP Reciprocal Square Root, First Iteration Step (3dNow!). + inline void pfrsqit1(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFRSQIT1, &dst, &src); + } + + //! @brief Packed SP-FP Reciprocal Square Root Approximation (3dNow!). + inline void pfrsqrt(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFRSQRT, &dst, &src); + } + //! @brief Packed SP-FP Reciprocal Square Root Approximation (3dNow!). + inline void pfrsqrt(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFRSQRT, &dst, &src); + } + + //! @brief Packed SP-FP Subtract (3dNow!). + inline void pfsub(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFSUB, &dst, &src); + } + //! @brief Packed SP-FP Subtract (3dNow!). + inline void pfsub(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFSUB, &dst, &src); + } + + //! @brief Packed SP-FP Reverse Subtract (3dNow!). + inline void pfsubr(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PFSUBR, &dst, &src); + } + //! @brief Packed SP-FP Reverse Subtract (3dNow!). + inline void pfsubr(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PFSUBR, &dst, &src); + } + + //! @brief Packed DWords to SP-FP (3dNow!). + inline void pi2fd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PI2FD, &dst, &src); + } + //! @brief Packed DWords to SP-FP (3dNow!). + inline void pi2fd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PI2FD, &dst, &src); + } + + //! @brief Packed Words to SP-FP (3dNow!). + inline void pi2fw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PI2FW, &dst, &src); + } + //! @brief Packed Words to SP-FP (3dNow!). + inline void pi2fw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PI2FW, &dst, &src); + } + + //! @brief Packed swap DWord (3dNow!) + inline void pswapd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSWAPD, &dst, &src); + } + //! @brief Packed swap DWord (3dNow!) + inline void pswapd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSWAPD, &dst, &src); + } + + // -------------------------------------------------------------------------- + // [SSE] + // -------------------------------------------------------------------------- + + //! @brief Packed SP-FP Add (SSE). + inline void addps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_ADDPS, &dst, &src); + } + //! @brief Packed SP-FP Add (SSE). + inline void addps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_ADDPS, &dst, &src); + } + + //! @brief Scalar SP-FP Add (SSE). + inline void addss(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_ADDSS, &dst, &src); + } + //! @brief Scalar SP-FP Add (SSE). + inline void addss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_ADDSS, &dst, &src); + } + + //! @brief Bit-wise Logical And Not For SP-FP (SSE). + inline void andnps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_ANDNPS, &dst, &src); + } + //! @brief Bit-wise Logical And Not For SP-FP (SSE). + inline void andnps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_ANDNPS, &dst, &src); + } + + //! @brief Bit-wise Logical And For SP-FP (SSE). + inline void andps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_ANDPS, &dst, &src); + } + //! @brief Bit-wise Logical And For SP-FP (SSE). + inline void andps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_ANDPS, &dst, &src); + } + + //! @brief Packed SP-FP Compare (SSE). + inline void cmpps(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_CMPPS, &dst, &src, &imm8); + } + //! @brief Packed SP-FP Compare (SSE). + inline void cmpps(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_CMPPS, &dst, &src, &imm8); + } + + //! @brief Compare Scalar SP-FP Values (SSE). + inline void cmpss(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_CMPSS, &dst, &src, &imm8); + } + //! @brief Compare Scalar SP-FP Values (SSE). + inline void cmpss(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_CMPSS, &dst, &src, &imm8); + } + + //! @brief Scalar Ordered SP-FP Compare and Set EFLAGS (SSE). + inline void comiss(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_COMISS, &dst, &src); + } + //! @brief Scalar Ordered SP-FP Compare and Set EFLAGS (SSE). + inline void comiss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_COMISS, &dst, &src); + } + + //! @brief Packed Signed INT32 to Packed SP-FP Conversion (SSE). + inline void cvtpi2ps(const XMMReg& dst, const MMReg& src) + { + _emitInstruction(INST_CVTPI2PS, &dst, &src); + } + //! @brief Packed Signed INT32 to Packed SP-FP Conversion (SSE). + inline void cvtpi2ps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTPI2PS, &dst, &src); + } + + //! @brief Packed SP-FP to Packed INT32 Conversion (SSE). + inline void cvtps2pi(const MMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTPS2PI, &dst, &src); + } + //! @brief Packed SP-FP to Packed INT32 Conversion (SSE). + inline void cvtps2pi(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTPS2PI, &dst, &src); + } + + //! @brief Scalar Signed INT32 to SP-FP Conversion (SSE). + inline void cvtsi2ss(const XMMReg& dst, const GPReg& src) + { + _emitInstruction(INST_CVTSI2SS, &dst, &src); + } + //! @brief Scalar Signed INT32 to SP-FP Conversion (SSE). + inline void cvtsi2ss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTSI2SS, &dst, &src); + } + + //! @brief Scalar SP-FP to Signed INT32 Conversion (SSE). + inline void cvtss2si(const GPReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTSS2SI, &dst, &src); + } + //! @brief Scalar SP-FP to Signed INT32 Conversion (SSE). + inline void cvtss2si(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTSS2SI, &dst, &src); + } + + //! @brief Packed SP-FP to Packed INT32 Conversion (truncate) (SSE). + inline void cvttps2pi(const MMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTTPS2PI, &dst, &src); + } + //! @brief Packed SP-FP to Packed INT32 Conversion (truncate) (SSE). + inline void cvttps2pi(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTTPS2PI, &dst, &src); + } + + //! @brief Scalar SP-FP to Signed INT32 Conversion (truncate) (SSE). + inline void cvttss2si(const GPReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTTSS2SI, &dst, &src); + } + //! @brief Scalar SP-FP to Signed INT32 Conversion (truncate) (SSE). + inline void cvttss2si(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTTSS2SI, &dst, &src); + } + + //! @brief Packed SP-FP Divide (SSE). + inline void divps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_DIVPS, &dst, &src); + } + //! @brief Packed SP-FP Divide (SSE). + inline void divps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_DIVPS, &dst, &src); + } + + //! @brief Scalar SP-FP Divide (SSE). + inline void divss(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_DIVSS, &dst, &src); + } + //! @brief Scalar SP-FP Divide (SSE). + inline void divss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_DIVSS, &dst, &src); + } + + //! @brief Load Streaming SIMD Extension Control/Status (SSE). + inline void ldmxcsr(const Mem& src) + { + _emitInstruction(INST_LDMXCSR, &src); + } + + //! @brief Byte Mask Write (SSE). + //! + //! @note The default memory location is specified by DS:EDI. + inline void maskmovq(const MMReg& data, const MMReg& mask) + { + _emitInstruction(INST_MASKMOVQ, &data, &mask); + } + + //! @brief Packed SP-FP Maximum (SSE). + inline void maxps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MAXPS, &dst, &src); + } + //! @brief Packed SP-FP Maximum (SSE). + inline void maxps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MAXPS, &dst, &src); + } + + //! @brief Scalar SP-FP Maximum (SSE). + inline void maxss(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MAXSS, &dst, &src); + } + //! @brief Scalar SP-FP Maximum (SSE). + inline void maxss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MAXSS, &dst, &src); + } + + //! @brief Packed SP-FP Minimum (SSE). + inline void minps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MINPS, &dst, &src); + } + //! @brief Packed SP-FP Minimum (SSE). + inline void minps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MINPS, &dst, &src); + } + + //! @brief Scalar SP-FP Minimum (SSE). + inline void minss(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MINSS, &dst, &src); + } + //! @brief Scalar SP-FP Minimum (SSE). + inline void minss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MINSS, &dst, &src); + } + + //! @brief Move Aligned Packed SP-FP Values (SSE). + inline void movaps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVAPS, &dst, &src); + } + //! @brief Move Aligned Packed SP-FP Values (SSE). + inline void movaps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVAPS, &dst, &src); + } + + //! @brief Move Aligned Packed SP-FP Values (SSE). + inline void movaps(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVAPS, &dst, &src); + } + + //! @brief Move DWord. + inline void movd(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + //! @brief Move DWord. + inline void movd(const GPReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + //! @brief Move DWord. + inline void movd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + //! @brief Move DWord. + inline void movd(const XMMReg& dst, const GPReg& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + + //! @brief Move QWord (SSE). + inline void movq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } + //! @brief Move QWord (SSE). + inline void movq(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#if defined(ASMJIT_X64) + //! @brief Move QWord (SSE). + inline void movq(const GPReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#endif // ASMJIT_X64 + //! @brief Move QWord (SSE). + inline void movq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#if defined(ASMJIT_X64) + //! @brief Move QWord (SSE). + inline void movq(const XMMReg& dst, const GPReg& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#endif // ASMJIT_X64 + + //! @brief Move 64 Bits Non Temporal (SSE). + inline void movntq(const Mem& dst, const MMReg& src) + { + _emitInstruction(INST_MOVNTQ, &dst, &src); + } + + //! @brief High to Low Packed SP-FP (SSE). + inline void movhlps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVHLPS, &dst, &src); + } + + //! @brief Move High Packed SP-FP (SSE). + inline void movhps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVHPS, &dst, &src); + } + + //! @brief Move High Packed SP-FP (SSE). + inline void movhps(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVHPS, &dst, &src); + } + + //! @brief Move Low to High Packed SP-FP (SSE). + inline void movlhps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVLHPS, &dst, &src); + } + + //! @brief Move Low Packed SP-FP (SSE). + inline void movlps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVLPS, &dst, &src); + } + + //! @brief Move Low Packed SP-FP (SSE). + inline void movlps(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVLPS, &dst, &src); + } + + //! @brief Move Aligned Four Packed SP-FP Non Temporal (SSE). + inline void movntps(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVNTPS, &dst, &src); + } + + //! @brief Move Scalar SP-FP (SSE). + inline void movss(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVSS, &dst, &src); + } + + //! @brief Move Scalar SP-FP (SSE). + inline void movss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVSS, &dst, &src); + } + + //! @brief Move Scalar SP-FP (SSE). + inline void movss(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVSS, &dst, &src); + } + + //! @brief Move Unaligned Packed SP-FP Values (SSE). + inline void movups(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVUPS, &dst, &src); + } + //! @brief Move Unaligned Packed SP-FP Values (SSE). + inline void movups(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVUPS, &dst, &src); + } + + //! @brief Move Unaligned Packed SP-FP Values (SSE). + inline void movups(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVUPS, &dst, &src); + } + + //! @brief Packed SP-FP Multiply (SSE). + inline void mulps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MULPS, &dst, &src); + } + //! @brief Packed SP-FP Multiply (SSE). + inline void mulps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MULPS, &dst, &src); + } + + //! @brief Scalar SP-FP Multiply (SSE). + inline void mulss(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MULSS, &dst, &src); + } + //! @brief Scalar SP-FP Multiply (SSE). + inline void mulss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MULSS, &dst, &src); + } + + //! @brief Bit-wise Logical OR for SP-FP Data (SSE). + inline void orps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_ORPS, &dst, &src); + } + //! @brief Bit-wise Logical OR for SP-FP Data (SSE). + inline void orps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_ORPS, &dst, &src); + } + + //! @brief Packed Average (SSE). + inline void pavgb(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PAVGB, &dst, &src); + } + //! @brief Packed Average (SSE). + inline void pavgb(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PAVGB, &dst, &src); + } + + //! @brief Packed Average (SSE). + inline void pavgw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PAVGW, &dst, &src); + } + //! @brief Packed Average (SSE). + inline void pavgw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PAVGW, &dst, &src); + } + + //! @brief Extract Word (SSE). + inline void pextrw(const GPReg& dst, const MMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRW, &dst, &src, &imm8); + } + + //! @brief Insert Word (SSE). + inline void pinsrw(const MMReg& dst, const GPReg& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRW, &dst, &src, &imm8); + } + //! @brief Insert Word (SSE). + inline void pinsrw(const MMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRW, &dst, &src, &imm8); + } + + //! @brief Packed Signed Integer Word Maximum (SSE). + inline void pmaxsw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PMAXSW, &dst, &src); + } + //! @brief Packed Signed Integer Word Maximum (SSE). + inline void pmaxsw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMAXSW, &dst, &src); + } + + //! @brief Packed Unsigned Integer Byte Maximum (SSE). + inline void pmaxub(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PMAXUB, &dst, &src); + } + //! @brief Packed Unsigned Integer Byte Maximum (SSE). + inline void pmaxub(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMAXUB, &dst, &src); + } + + //! @brief Packed Signed Integer Word Minimum (SSE). + inline void pminsw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PMINSW, &dst, &src); + } + //! @brief Packed Signed Integer Word Minimum (SSE). + inline void pminsw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMINSW, &dst, &src); + } + + //! @brief Packed Unsigned Integer Byte Minimum (SSE). + inline void pminub(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PMINUB, &dst, &src); + } + //! @brief Packed Unsigned Integer Byte Minimum (SSE). + inline void pminub(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMINUB, &dst, &src); + } + + //! @brief Move Byte Mask To Integer (SSE). + inline void pmovmskb(const GPReg& dst, const MMReg& src) + { + _emitInstruction(INST_PMOVMSKB, &dst, &src); + } + + //! @brief Packed Multiply High Unsigned (SSE). + inline void pmulhuw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PMULHUW, &dst, &src); + } + //! @brief Packed Multiply High Unsigned (SSE). + inline void pmulhuw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMULHUW, &dst, &src); + } + + //! @brief Packed Sum of Absolute Differences (SSE). + inline void psadbw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSADBW, &dst, &src); + } + //! @brief Packed Sum of Absolute Differences (SSE). + inline void psadbw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSADBW, &dst, &src); + } + + //! @brief Packed Shuffle word (SSE). + inline void pshufw(const MMReg& dst, const MMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFW, &dst, &src, &imm8); + } + //! @brief Packed Shuffle word (SSE). + inline void pshufw(const MMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFW, &dst, &src, &imm8); + } + + //! @brief Packed SP-FP Reciprocal (SSE). + inline void rcpps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_RCPPS, &dst, &src); + } + //! @brief Packed SP-FP Reciprocal (SSE). + inline void rcpps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_RCPPS, &dst, &src); + } + + //! @brief Scalar SP-FP Reciprocal (SSE). + inline void rcpss(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_RCPSS, &dst, &src); + } + //! @brief Scalar SP-FP Reciprocal (SSE). + inline void rcpss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_RCPSS, &dst, &src); + } + + //! @brief Prefetch (SSE). + inline void prefetch(const Mem& mem, const Imm& hint) + { + _emitInstruction(INST_PREFETCH, &mem, &hint); + } + + //! @brief Compute Sum of Absolute Differences (SSE). + inline void psadbw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSADBW, &dst, &src); + } + //! @brief Compute Sum of Absolute Differences (SSE). + inline void psadbw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSADBW, &dst, &src); + } + + //! @brief Packed SP-FP Square Root Reciprocal (SSE). + inline void rsqrtps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_RSQRTPS, &dst, &src); + } + //! @brief Packed SP-FP Square Root Reciprocal (SSE). + inline void rsqrtps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_RSQRTPS, &dst, &src); + } + + //! @brief Scalar SP-FP Square Root Reciprocal (SSE). + inline void rsqrtss(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_RSQRTSS, &dst, &src); + } + //! @brief Scalar SP-FP Square Root Reciprocal (SSE). + inline void rsqrtss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_RSQRTSS, &dst, &src); + } + + //! @brief Store fence (SSE). + inline void sfence() + { + _emitInstruction(INST_SFENCE); + } + + //! @brief Shuffle SP-FP (SSE). + inline void shufps(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_SHUFPS, &dst, &src, &imm8); + } + //! @brief Shuffle SP-FP (SSE). + inline void shufps(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_SHUFPS, &dst, &src, &imm8); + } + + //! @brief Packed SP-FP Square Root (SSE). + inline void sqrtps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_SQRTPS, &dst, &src); + } + //! @brief Packed SP-FP Square Root (SSE). + inline void sqrtps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_SQRTPS, &dst, &src); + } + + //! @brief Scalar SP-FP Square Root (SSE). + inline void sqrtss(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_SQRTSS, &dst, &src); + } + //! @brief Scalar SP-FP Square Root (SSE). + inline void sqrtss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_SQRTSS, &dst, &src); + } + + //! @brief Store Streaming SIMD Extension Control/Status (SSE). + inline void stmxcsr(const Mem& dst) + { + _emitInstruction(INST_STMXCSR, &dst); + } + + //! @brief Packed SP-FP Subtract (SSE). + inline void subps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_SUBPS, &dst, &src); + } + //! @brief Packed SP-FP Subtract (SSE). + inline void subps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_SUBPS, &dst, &src); + } + + //! @brief Scalar SP-FP Subtract (SSE). + inline void subss(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_SUBSS, &dst, &src); + } + //! @brief Scalar SP-FP Subtract (SSE). + inline void subss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_SUBSS, &dst, &src); + } + + //! @brief Unordered Scalar SP-FP compare and set EFLAGS (SSE). + inline void ucomiss(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_UCOMISS, &dst, &src); + } + //! @brief Unordered Scalar SP-FP compare and set EFLAGS (SSE). + inline void ucomiss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_UCOMISS, &dst, &src); + } + + //! @brief Unpack High Packed SP-FP Data (SSE). + inline void unpckhps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_UNPCKHPS, &dst, &src); + } + //! @brief Unpack High Packed SP-FP Data (SSE). + inline void unpckhps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_UNPCKHPS, &dst, &src); + } + + //! @brief Unpack Low Packed SP-FP Data (SSE). + inline void unpcklps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_UNPCKLPS, &dst, &src); + } + //! @brief Unpack Low Packed SP-FP Data (SSE). + inline void unpcklps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_UNPCKLPS, &dst, &src); + } + + //! @brief Bit-wise Logical Xor for SP-FP Data (SSE). + inline void xorps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_XORPS, &dst, &src); + } + //! @brief Bit-wise Logical Xor for SP-FP Data (SSE). + inline void xorps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_XORPS, &dst, &src); + } + + // -------------------------------------------------------------------------- + // [SSE2] + // -------------------------------------------------------------------------- + + //! @brief Packed DP-FP Add (SSE2). + inline void addpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_ADDPD, &dst, &src); + } + //! @brief Packed DP-FP Add (SSE2). + inline void addpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_ADDPD, &dst, &src); + } + + //! @brief Scalar DP-FP Add (SSE2). + inline void addsd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_ADDSD, &dst, &src); + } + //! @brief Scalar DP-FP Add (SSE2). + inline void addsd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_ADDSD, &dst, &src); + } + + //! @brief Bit-wise Logical And Not For DP-FP (SSE2). + inline void andnpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_ANDNPD, &dst, &src); + } + //! @brief Bit-wise Logical And Not For DP-FP (SSE2). + inline void andnpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_ANDNPD, &dst, &src); + } + + //! @brief Bit-wise Logical And For DP-FP (SSE2). + inline void andpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_ANDPD, &dst, &src); + } + //! @brief Bit-wise Logical And For DP-FP (SSE2). + inline void andpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_ANDPD, &dst, &src); + } + + //! @brief Flush Cache Line (SSE2). + inline void clflush(const Mem& mem) + { + _emitInstruction(INST_CLFLUSH, &mem); + } + + //! @brief Packed DP-FP Compare (SSE2). + inline void cmppd(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_CMPPD, &dst, &src, &imm8); + } + //! @brief Packed DP-FP Compare (SSE2). + inline void cmppd(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_CMPPD, &dst, &src, &imm8); + } + + //! @brief Compare Scalar SP-FP Values (SSE2). + inline void cmpsd(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_CMPSD, &dst, &src, &imm8); + } + //! @brief Compare Scalar SP-FP Values (SSE2). + inline void cmpsd(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_CMPSD, &dst, &src, &imm8); + } + + //! @brief Scalar Ordered DP-FP Compare and Set EFLAGS (SSE2). + inline void comisd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_COMISD, &dst, &src); + } + //! @brief Scalar Ordered DP-FP Compare and Set EFLAGS (SSE2). + inline void comisd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_COMISD, &dst, &src); + } + + //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2). + inline void cvtdq2pd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTDQ2PD, &dst, &src); + } + //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2). + inline void cvtdq2pd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTDQ2PD, &dst, &src); + } + + //! @brief Convert Packed Dword Integers to Packed SP-FP Values (SSE2). + inline void cvtdq2ps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTDQ2PS, &dst, &src); + } + //! @brief Convert Packed Dword Integers to Packed SP-FP Values (SSE2). + inline void cvtdq2ps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTDQ2PS, &dst, &src); + } + + //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvtpd2dq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTPD2DQ, &dst, &src); + } + //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvtpd2dq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTPD2DQ, &dst, &src); + } + + //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvtpd2pi(const MMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTPD2PI, &dst, &src); + } + //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvtpd2pi(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTPD2PI, &dst, &src); + } + + //! @brief Convert Packed DP-FP Values to Packed SP-FP Values (SSE2). + inline void cvtpd2ps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTPD2PS, &dst, &src); + } + //! @brief Convert Packed DP-FP Values to Packed SP-FP Values (SSE2). + inline void cvtpd2ps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTPD2PS, &dst, &src); + } + + //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2). + inline void cvtpi2pd(const XMMReg& dst, const MMReg& src) + { + _emitInstruction(INST_CVTPI2PD, &dst, &src); + } + //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2). + inline void cvtpi2pd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTPI2PD, &dst, &src); + } + + //! @brief Convert Packed SP-FP Values to Packed Dword Integers (SSE2). + inline void cvtps2dq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTPS2DQ, &dst, &src); + } + //! @brief Convert Packed SP-FP Values to Packed Dword Integers (SSE2). + inline void cvtps2dq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTPS2DQ, &dst, &src); + } + + //! @brief Convert Packed SP-FP Values to Packed DP-FP Values (SSE2). + inline void cvtps2pd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTPS2PD, &dst, &src); + } + //! @brief Convert Packed SP-FP Values to Packed DP-FP Values (SSE2). + inline void cvtps2pd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTPS2PD, &dst, &src); + } + + //! @brief Convert Scalar DP-FP Value to Dword Integer (SSE2). + inline void cvtsd2si(const GPReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTSD2SI, &dst, &src); + } + //! @brief Convert Scalar DP-FP Value to Dword Integer (SSE2). + inline void cvtsd2si(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTSD2SI, &dst, &src); + } + + //! @brief Convert Scalar DP-FP Value to Scalar SP-FP Value (SSE2). + inline void cvtsd2ss(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTSD2SS, &dst, &src); + } + //! @brief Convert Scalar DP-FP Value to Scalar SP-FP Value (SSE2). + inline void cvtsd2ss(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTSD2SS, &dst, &src); + } + + //! @brief Convert Dword Integer to Scalar DP-FP Value (SSE2). + inline void cvtsi2sd(const XMMReg& dst, const GPReg& src) + { + _emitInstruction(INST_CVTSI2SD, &dst, &src); + } + //! @brief Convert Dword Integer to Scalar DP-FP Value (SSE2). + inline void cvtsi2sd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTSI2SD, &dst, &src); + } + + //! @brief Convert Scalar SP-FP Value to Scalar DP-FP Value (SSE2). + inline void cvtss2sd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTSS2SD, &dst, &src); + } + //! @brief Convert Scalar SP-FP Value to Scalar DP-FP Value (SSE2). + inline void cvtss2sd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTSS2SD, &dst, &src); + } + + //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvttpd2pi(const MMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTTPD2PI, &dst, &src); + } + //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvttpd2pi(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTTPD2PI, &dst, &src); + } + + //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvttpd2dq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTTPD2DQ, &dst, &src); + } + //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvttpd2dq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTTPD2DQ, &dst, &src); + } + + //! @brief Convert with Truncation Packed SP-FP Values to Packed Dword Integers (SSE2). + inline void cvttps2dq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTTPS2DQ, &dst, &src); + } + //! @brief Convert with Truncation Packed SP-FP Values to Packed Dword Integers (SSE2). + inline void cvttps2dq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTTPS2DQ, &dst, &src); + } + + //! @brief Convert with Truncation Scalar DP-FP Value to Signed Dword Integer (SSE2). + inline void cvttsd2si(const GPReg& dst, const XMMReg& src) + { + _emitInstruction(INST_CVTTSD2SI, &dst, &src); + } + //! @brief Convert with Truncation Scalar DP-FP Value to Signed Dword Integer (SSE2). + inline void cvttsd2si(const GPReg& dst, const Mem& src) + { + _emitInstruction(INST_CVTTSD2SI, &dst, &src); + } + + //! @brief Packed DP-FP Divide (SSE2). + inline void divpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_DIVPD, &dst, &src); + } + //! @brief Packed DP-FP Divide (SSE2). + inline void divpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_DIVPD, &dst, &src); + } + + //! @brief Scalar DP-FP Divide (SSE2). + inline void divsd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_DIVSD, &dst, &src); + } + //! @brief Scalar DP-FP Divide (SSE2). + inline void divsd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_DIVSD, &dst, &src); + } + + //! @brief Load Fence (SSE2). + inline void lfence() + { + _emitInstruction(INST_LFENCE); + } + + //! @brief Store Selected Bytes of Double Quadword (SSE2). + //! + //! @note Target is DS:EDI. + inline void maskmovdqu(const XMMReg& src, const XMMReg& mask) + { + _emitInstruction(INST_MASKMOVDQU, &src, &mask); + } + + //! @brief Return Maximum Packed Double-Precision FP Values (SSE2). + inline void maxpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MAXPD, &dst, &src); + } + //! @brief Return Maximum Packed Double-Precision FP Values (SSE2). + inline void maxpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MAXPD, &dst, &src); + } + + //! @brief Return Maximum Scalar Double-Precision FP Value (SSE2). + inline void maxsd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MAXSD, &dst, &src); + } + //! @brief Return Maximum Scalar Double-Precision FP Value (SSE2). + inline void maxsd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MAXSD, &dst, &src); + } + + //! @brief Memory Fence (SSE2). + inline void mfence() + { + _emitInstruction(INST_MFENCE); + } + + //! @brief Return Minimum Packed DP-FP Values (SSE2). + inline void minpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MINPD, &dst, &src); + } + //! @brief Return Minimum Packed DP-FP Values (SSE2). + inline void minpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MINPD, &dst, &src); + } + + //! @brief Return Minimum Scalar DP-FP Value (SSE2). + inline void minsd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MINSD, &dst, &src); + } + //! @brief Return Minimum Scalar DP-FP Value (SSE2). + inline void minsd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MINSD, &dst, &src); + } + + //! @brief Move Aligned DQWord (SSE2). + inline void movdqa(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVDQA, &dst, &src); + } + //! @brief Move Aligned DQWord (SSE2). + inline void movdqa(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVDQA, &dst, &src); + } + + //! @brief Move Aligned DQWord (SSE2). + inline void movdqa(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVDQA, &dst, &src); + } + + //! @brief Move Unaligned Double Quadword (SSE2). + inline void movdqu(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVDQU, &dst, &src); + } + //! @brief Move Unaligned Double Quadword (SSE2). + inline void movdqu(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVDQU, &dst, &src); + } + + //! @brief Move Unaligned Double Quadword (SSE2). + inline void movdqu(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVDQU, &dst, &src); + } + + //! @brief Extract Packed SP-FP Sign Mask (SSE2). + inline void movmskps(const GPReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVMSKPS, &dst, &src); + } + + //! @brief Extract Packed DP-FP Sign Mask (SSE2). + inline void movmskpd(const GPReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVMSKPD, &dst, &src); + } + + //! @brief Move Scalar Double-Precision FP Value (SSE2). + inline void movsd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVSD, &dst, &src); + } + //! @brief Move Scalar Double-Precision FP Value (SSE2). + inline void movsd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVSD, &dst, &src); + } + + //! @brief Move Scalar Double-Precision FP Value (SSE2). + inline void movsd(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVSD, &dst, &src); + } + + //! @brief Move Aligned Packed Double-Precision FP Values (SSE2). + inline void movapd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVAPD, &dst, &src); + } + + //! @brief Move Aligned Packed Double-Precision FP Values (SSE2). + inline void movapd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVAPD, &dst, &src); + } + + //! @brief Move Aligned Packed Double-Precision FP Values (SSE2). + inline void movapd(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVAPD, &dst, &src); + } + + //! @brief Move Quadword from XMM to MMX Technology Register (SSE2). + inline void movdq2q(const MMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVDQ2Q, &dst, &src); + } + + //! @brief Move Quadword from MMX Technology to XMM Register (SSE2). + inline void movq2dq(const XMMReg& dst, const MMReg& src) + { + _emitInstruction(INST_MOVQ2DQ, &dst, &src); + } + + //! @brief Move High Packed Double-Precision FP Value (SSE2). + inline void movhpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVHPD, &dst, &src); + } + + //! @brief Move High Packed Double-Precision FP Value (SSE2). + inline void movhpd(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVHPD, &dst, &src); + } + + //! @brief Move Low Packed Double-Precision FP Value (SSE2). + inline void movlpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVLPD, &dst, &src); + } + + //! @brief Move Low Packed Double-Precision FP Value (SSE2). + inline void movlpd(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVLPD, &dst, &src); + } + + //! @brief Store Double Quadword Using Non-Temporal Hint (SSE2). + inline void movntdq(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVNTDQ, &dst, &src); + } + + //! @brief Store Store DWORD Using Non-Temporal Hint (SSE2). + inline void movnti(const Mem& dst, const GPReg& src) + { + _emitInstruction(INST_MOVNTI, &dst, &src); + } + + //! @brief Store Packed Double-Precision FP Values Using Non-Temporal Hint (SSE2). + inline void movntpd(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVNTPD, &dst, &src); + } + + //! @brief Move Unaligned Packed Double-Precision FP Values (SSE2). + inline void movupd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVUPD, &dst, &src); + } + + //! @brief Move Unaligned Packed Double-Precision FP Values (SSE2). + inline void movupd(const Mem& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVUPD, &dst, &src); + } + + //! @brief Packed DP-FP Multiply (SSE2). + inline void mulpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MULPD, &dst, &src); + } + //! @brief Packed DP-FP Multiply (SSE2). + inline void mulpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MULPD, &dst, &src); + } + + //! @brief Scalar DP-FP Multiply (SSE2). + inline void mulsd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MULSD, &dst, &src); + } + //! @brief Scalar DP-FP Multiply (SSE2). + inline void mulsd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MULSD, &dst, &src); + } + + //! @brief Bit-wise Logical OR for DP-FP Data (SSE2). + inline void orpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_ORPD, &dst, &src); + } + //! @brief Bit-wise Logical OR for DP-FP Data (SSE2). + inline void orpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_ORPD, &dst, &src); + } + + //! @brief Pack with Signed Saturation (SSE2). + inline void packsswb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PACKSSWB, &dst, &src); + } + //! @brief Pack with Signed Saturation (SSE2). + inline void packsswb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PACKSSWB, &dst, &src); + } + + //! @brief Pack with Signed Saturation (SSE2). + inline void packssdw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PACKSSDW, &dst, &src); + } + //! @brief Pack with Signed Saturation (SSE2). + inline void packssdw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PACKSSDW, &dst, &src); + } + + //! @brief Pack with Unsigned Saturation (SSE2). + inline void packuswb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PACKUSWB, &dst, &src); + } + //! @brief Pack with Unsigned Saturation (SSE2). + inline void packuswb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PACKUSWB, &dst, &src); + } + + //! @brief Packed BYTE Add (SSE2). + inline void paddb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PADDB, &dst, &src); + } + //! @brief Packed BYTE Add (SSE2). + inline void paddb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDB, &dst, &src); + } + + //! @brief Packed WORD Add (SSE2). + inline void paddw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PADDW, &dst, &src); + } + //! @brief Packed WORD Add (SSE2). + inline void paddw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDW, &dst, &src); + } + + //! @brief Packed DWORD Add (SSE2). + inline void paddd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PADDD, &dst, &src); + } + //! @brief Packed DWORD Add (SSE2). + inline void paddd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDD, &dst, &src); + } + + //! @brief Packed QWORD Add (SSE2). + inline void paddq(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PADDQ, &dst, &src); + } + //! @brief Packed QWORD Add (SSE2). + inline void paddq(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDQ, &dst, &src); + } + + //! @brief Packed QWORD Add (SSE2). + inline void paddq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PADDQ, &dst, &src); + } + //! @brief Packed QWORD Add (SSE2). + inline void paddq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDQ, &dst, &src); + } + + //! @brief Packed Add with Saturation (SSE2). + inline void paddsb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PADDSB, &dst, &src); + } + //! @brief Packed Add with Saturation (SSE2). + inline void paddsb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDSB, &dst, &src); + } + + //! @brief Packed Add with Saturation (SSE2). + inline void paddsw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PADDSW, &dst, &src); + } + //! @brief Packed Add with Saturation (SSE2). + inline void paddsw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDSW, &dst, &src); + } + + //! @brief Packed Add Unsigned with Saturation (SSE2). + inline void paddusb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PADDUSB, &dst, &src); + } + //! @brief Packed Add Unsigned with Saturation (SSE2). + inline void paddusb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDUSB, &dst, &src); + } + + //! @brief Packed Add Unsigned with Saturation (SSE2). + inline void paddusw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PADDUSW, &dst, &src); + } + //! @brief Packed Add Unsigned with Saturation (SSE2). + inline void paddusw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PADDUSW, &dst, &src); + } + + //! @brief Logical AND (SSE2). + inline void pand(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PAND, &dst, &src); + } + //! @brief Logical AND (SSE2). + inline void pand(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PAND, &dst, &src); + } + + //! @brief Logical AND Not (SSE2). + inline void pandn(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PANDN, &dst, &src); + } + //! @brief Logical AND Not (SSE2). + inline void pandn(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PANDN, &dst, &src); + } + + //! @brief Spin Loop Hint (SSE2). + inline void pause() + { + _emitInstruction(INST_PAUSE); + } + + //! @brief Packed Average (SSE2). + inline void pavgb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PAVGB, &dst, &src); + } + //! @brief Packed Average (SSE2). + inline void pavgb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PAVGB, &dst, &src); + } + + //! @brief Packed Average (SSE2). + inline void pavgw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PAVGW, &dst, &src); + } + //! @brief Packed Average (SSE2). + inline void pavgw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PAVGW, &dst, &src); + } + + //! @brief Packed Compare for Equal (BYTES) (SSE2). + inline void pcmpeqb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PCMPEQB, &dst, &src); + } + //! @brief Packed Compare for Equal (BYTES) (SSE2). + inline void pcmpeqb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQB, &dst, &src); + } + + //! @brief Packed Compare for Equal (WORDS) (SSE2). + inline void pcmpeqw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PCMPEQW, &dst, &src); + } + //! @brief Packed Compare for Equal (WORDS) (SSE2). + inline void pcmpeqw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQW, &dst, &src); + } + + //! @brief Packed Compare for Equal (DWORDS) (SSE2). + inline void pcmpeqd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PCMPEQD, &dst, &src); + } + //! @brief Packed Compare for Equal (DWORDS) (SSE2). + inline void pcmpeqd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQD, &dst, &src); + } + + //! @brief Packed Compare for Greater Than (BYTES) (SSE2). + inline void pcmpgtb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PCMPGTB, &dst, &src); + } + //! @brief Packed Compare for Greater Than (BYTES) (SSE2). + inline void pcmpgtb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTB, &dst, &src); + } + + //! @brief Packed Compare for Greater Than (WORDS) (SSE2). + inline void pcmpgtw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PCMPGTW, &dst, &src); + } + //! @brief Packed Compare for Greater Than (WORDS) (SSE2). + inline void pcmpgtw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTW, &dst, &src); + } + + //! @brief Packed Compare for Greater Than (DWORDS) (SSE2). + inline void pcmpgtd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PCMPGTD, &dst, &src); + } + //! @brief Packed Compare for Greater Than (DWORDS) (SSE2). + inline void pcmpgtd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTD, &dst, &src); + } + + //! @brief Packed Signed Integer Word Maximum (SSE2). + inline void pmaxsw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMAXSW, &dst, &src); + } + //! @brief Packed Signed Integer Word Maximum (SSE2). + inline void pmaxsw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMAXSW, &dst, &src); + } + + //! @brief Packed Unsigned Integer Byte Maximum (SSE2). + inline void pmaxub(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMAXUB, &dst, &src); + } + //! @brief Packed Unsigned Integer Byte Maximum (SSE2). + inline void pmaxub(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMAXUB, &dst, &src); + } + + //! @brief Packed Signed Integer Word Minimum (SSE2). + inline void pminsw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMINSW, &dst, &src); + } + //! @brief Packed Signed Integer Word Minimum (SSE2). + inline void pminsw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMINSW, &dst, &src); + } + + //! @brief Packed Unsigned Integer Byte Minimum (SSE2). + inline void pminub(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMINUB, &dst, &src); + } + //! @brief Packed Unsigned Integer Byte Minimum (SSE2). + inline void pminub(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMINUB, &dst, &src); + } + + //! @brief Move Byte Mask (SSE2). + inline void pmovmskb(const GPReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMOVMSKB, &dst, &src); + } + + //! @brief Packed Multiply High (SSE2). + inline void pmulhw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMULHW, &dst, &src); + } + //! @brief Packed Multiply High (SSE2). + inline void pmulhw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMULHW, &dst, &src); + } + + //! @brief Packed Multiply High Unsigned (SSE2). + inline void pmulhuw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMULHUW, &dst, &src); + } + //! @brief Packed Multiply High Unsigned (SSE2). + inline void pmulhuw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMULHUW, &dst, &src); + } + + //! @brief Packed Multiply Low (SSE2). + inline void pmullw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMULLW, &dst, &src); + } + //! @brief Packed Multiply Low (SSE2). + inline void pmullw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMULLW, &dst, &src); + } + + //! @brief Packed Multiply to QWORD (SSE2). + inline void pmuludq(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PMULUDQ, &dst, &src); + } + //! @brief Packed Multiply to QWORD (SSE2). + inline void pmuludq(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMULUDQ, &dst, &src); + } + + //! @brief Packed Multiply to QWORD (SSE2). + inline void pmuludq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMULUDQ, &dst, &src); + } + //! @brief Packed Multiply to QWORD (SSE2). + inline void pmuludq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMULUDQ, &dst, &src); + } + + //! @brief Bitwise Logical OR (SSE2). + inline void por(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_POR, &dst, &src); + } + //! @brief Bitwise Logical OR (SSE2). + inline void por(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_POR, &dst, &src); + } + + //! @brief Packed Shift Left Logical (SSE2). + inline void pslld(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSLLD, &dst, &src); + } + //! @brief Packed Shift Left Logical (SSE2). + inline void pslld(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSLLD, &dst, &src); + } + //! @brief Packed Shift Left Logical (SSE2). + inline void pslld(const XMMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSLLD, &dst, &src); + } + + //! @brief Packed Shift Left Logical (SSE2). + inline void psllq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSLLQ, &dst, &src); + } + //! @brief Packed Shift Left Logical (SSE2). + inline void psllq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSLLQ, &dst, &src); + } + //! @brief Packed Shift Left Logical (SSE2). + inline void psllq(const XMMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSLLQ, &dst, &src); + } + + //! @brief Packed Shift Left Logical (SSE2). + inline void psllw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSLLW, &dst, &src); + } + //! @brief Packed Shift Left Logical (SSE2). + inline void psllw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSLLW, &dst, &src); + } + //! @brief Packed Shift Left Logical (SSE2). + inline void psllw(const XMMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSLLW, &dst, &src); + } + + //! @brief Packed Shift Left Logical (SSE2). + inline void pslldq(const XMMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSLLDQ, &dst, &src); + } + + //! @brief Packed Shift Right Arithmetic (SSE2). + inline void psrad(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSRAD, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (SSE2). + inline void psrad(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSRAD, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (SSE2). + inline void psrad(const XMMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSRAD, &dst, &src); + } + + //! @brief Packed Shift Right Arithmetic (SSE2). + inline void psraw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSRAW, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (SSE2). + inline void psraw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSRAW, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (SSE2). + inline void psraw(const XMMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSRAW, &dst, &src); + } + + //! @brief Packed Subtract (SSE2). + inline void psubb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSUBB, &dst, &src); + } + //! @brief Packed Subtract (SSE2). + inline void psubb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBB, &dst, &src); + } + + //! @brief Packed Subtract (SSE2). + inline void psubw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSUBW, &dst, &src); + } + //! @brief Packed Subtract (SSE2). + inline void psubw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBW, &dst, &src); + } + + //! @brief Packed Subtract (SSE2). + inline void psubd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSUBD, &dst, &src); + } + //! @brief Packed Subtract (SSE2). + inline void psubd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBD, &dst, &src); + } + + //! @brief Packed Subtract (SSE2). + inline void psubq(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSUBQ, &dst, &src); + } + //! @brief Packed Subtract (SSE2). + inline void psubq(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBQ, &dst, &src); + } + + //! @brief Packed Subtract (SSE2). + inline void psubq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSUBQ, &dst, &src); + } + //! @brief Packed Subtract (SSE2). + inline void psubq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBQ, &dst, &src); + } + + //! @brief Packed Multiply and Add (SSE2). + inline void pmaddwd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMADDWD, &dst, &src); + } + //! @brief Packed Multiply and Add (SSE2). + inline void pmaddwd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMADDWD, &dst, &src); + } + + //! @brief Shuffle Packed DWORDs (SSE2). + inline void pshufd(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFD, &dst, &src, &imm8); + } + //! @brief Shuffle Packed DWORDs (SSE2). + inline void pshufd(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFD, &dst, &src, &imm8); + } + + //! @brief Shuffle Packed High Words (SSE2). + inline void pshufhw(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFHW, &dst, &src, &imm8); + } + //! @brief Shuffle Packed High Words (SSE2). + inline void pshufhw(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFHW, &dst, &src, &imm8); + } + + //! @brief Shuffle Packed Low Words (SSE2). + inline void pshuflw(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFLW, &dst, &src, &imm8); + } + //! @brief Shuffle Packed Low Words (SSE2). + inline void pshuflw(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFLW, &dst, &src, &imm8); + } + + //! @brief Packed Shift Right Logical (SSE2). + inline void psrld(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSRLD, &dst, &src); + } + //! @brief Packed Shift Right Logical (SSE2). + inline void psrld(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSRLD, &dst, &src); + } + //! @brief Packed Shift Right Logical (SSE2). + inline void psrld(const XMMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSRLD, &dst, &src); + } + + //! @brief Packed Shift Right Logical (SSE2). + inline void psrlq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSRLQ, &dst, &src); + } + //! @brief Packed Shift Right Logical (SSE2). + inline void psrlq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSRLQ, &dst, &src); + } + //! @brief Packed Shift Right Logical (SSE2). + inline void psrlq(const XMMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSRLQ, &dst, &src); + } + + //! @brief DQWord Shift Right Logical (MMX). + inline void psrldq(const XMMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSRLDQ, &dst, &src); + } + + //! @brief Packed Shift Right Logical (SSE2). + inline void psrlw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSRLW, &dst, &src); + } + //! @brief Packed Shift Right Logical (SSE2). + inline void psrlw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSRLW, &dst, &src); + } + //! @brief Packed Shift Right Logical (SSE2). + inline void psrlw(const XMMReg& dst, const Imm& src) + { + _emitInstruction(INST_PSRLW, &dst, &src); + } + + //! @brief Packed Subtract with Saturation (SSE2). + inline void psubsb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSUBSB, &dst, &src); + } + //! @brief Packed Subtract with Saturation (SSE2). + inline void psubsb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBSB, &dst, &src); + } + + //! @brief Packed Subtract with Saturation (SSE2). + inline void psubsw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSUBSW, &dst, &src); + } + //! @brief Packed Subtract with Saturation (SSE2). + inline void psubsw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBSW, &dst, &src); + } + + //! @brief Packed Subtract with Unsigned Saturation (SSE2). + inline void psubusb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSUBUSB, &dst, &src); + } + //! @brief Packed Subtract with Unsigned Saturation (SSE2). + inline void psubusb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBUSB, &dst, &src); + } + + //! @brief Packed Subtract with Unsigned Saturation (SSE2). + inline void psubusw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSUBUSW, &dst, &src); + } + //! @brief Packed Subtract with Unsigned Saturation (SSE2). + inline void psubusw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSUBUSW, &dst, &src); + } + + //! @brief Unpack High Data (SSE2). + inline void punpckhbw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PUNPCKHBW, &dst, &src); + } + //! @brief Unpack High Data (SSE2). + inline void punpckhbw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHBW, &dst, &src); + } + + //! @brief Unpack High Data (SSE2). + inline void punpckhwd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PUNPCKHWD, &dst, &src); + } + //! @brief Unpack High Data (SSE2). + inline void punpckhwd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHWD, &dst, &src); + } + + //! @brief Unpack High Data (SSE2). + inline void punpckhdq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PUNPCKHDQ, &dst, &src); + } + //! @brief Unpack High Data (SSE2). + inline void punpckhdq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHDQ, &dst, &src); + } + + //! @brief Unpack High Data (SSE2). + inline void punpckhqdq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PUNPCKHQDQ, &dst, &src); + } + //! @brief Unpack High Data (SSE2). + inline void punpckhqdq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHQDQ, &dst, &src); + } + + //! @brief Unpack Low Data (SSE2). + inline void punpcklbw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PUNPCKLBW, &dst, &src); + } + //! @brief Unpack Low Data (SSE2). + inline void punpcklbw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLBW, &dst, &src); + } + + //! @brief Unpack Low Data (SSE2). + inline void punpcklwd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PUNPCKLWD, &dst, &src); + } + //! @brief Unpack Low Data (SSE2). + inline void punpcklwd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLWD, &dst, &src); + } + + //! @brief Unpack Low Data (SSE2). + inline void punpckldq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PUNPCKLDQ, &dst, &src); + } + //! @brief Unpack Low Data (SSE2). + inline void punpckldq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLDQ, &dst, &src); + } + + //! @brief Unpack Low Data (SSE2). + inline void punpcklqdq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PUNPCKLQDQ, &dst, &src); + } + //! @brief Unpack Low Data (SSE2). + inline void punpcklqdq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLQDQ, &dst, &src); + } + + //! @brief Bitwise Exclusive OR (SSE2). + inline void pxor(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PXOR, &dst, &src); + } + //! @brief Bitwise Exclusive OR (SSE2). + inline void pxor(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PXOR, &dst, &src); + } + + //! @brief Shuffle DP-FP (SSE2). + inline void shufpd(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_SHUFPD, &dst, &src, &imm8); + } + //! @brief Shuffle DP-FP (SSE2). + inline void shufpd(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_SHUFPD, &dst, &src, &imm8); + } + + //! @brief Compute Square Roots of Packed DP-FP Values (SSE2). + inline void sqrtpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_SQRTPD, &dst, &src); + } + //! @brief Compute Square Roots of Packed DP-FP Values (SSE2). + inline void sqrtpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_SQRTPD, &dst, &src); + } + + //! @brief Compute Square Root of Scalar DP-FP Value (SSE2). + inline void sqrtsd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_SQRTSD, &dst, &src); + } + //! @brief Compute Square Root of Scalar DP-FP Value (SSE2). + inline void sqrtsd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_SQRTSD, &dst, &src); + } + + //! @brief Packed DP-FP Subtract (SSE2). + inline void subpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_SUBPD, &dst, &src); + } + //! @brief Packed DP-FP Subtract (SSE2). + inline void subpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_SUBPD, &dst, &src); + } + + //! @brief Scalar DP-FP Subtract (SSE2). + inline void subsd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_SUBSD, &dst, &src); + } + //! @brief Scalar DP-FP Subtract (SSE2). + inline void subsd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_SUBSD, &dst, &src); + } + + //! @brief Scalar Unordered DP-FP Compare and Set EFLAGS (SSE2). + inline void ucomisd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_UCOMISD, &dst, &src); + } + //! @brief Scalar Unordered DP-FP Compare and Set EFLAGS (SSE2). + inline void ucomisd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_UCOMISD, &dst, &src); + } + + //! @brief Unpack and Interleave High Packed Double-Precision FP Values (SSE2). + inline void unpckhpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_UNPCKHPD, &dst, &src); + } + //! @brief Unpack and Interleave High Packed Double-Precision FP Values (SSE2). + inline void unpckhpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_UNPCKHPD, &dst, &src); + } + + //! @brief Unpack and Interleave Low Packed Double-Precision FP Values (SSE2). + inline void unpcklpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_UNPCKLPD, &dst, &src); + } + //! @brief Unpack and Interleave Low Packed Double-Precision FP Values (SSE2). + inline void unpcklpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_UNPCKLPD, &dst, &src); + } + + //! @brief Bit-wise Logical OR for DP-FP Data (SSE2). + inline void xorpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_XORPD, &dst, &src); + } + //! @brief Bit-wise Logical OR for DP-FP Data (SSE2). + inline void xorpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_XORPD, &dst, &src); + } + + // -------------------------------------------------------------------------- + // [SSE3] + // -------------------------------------------------------------------------- + + //! @brief Packed DP-FP Add/Subtract (SSE3). + inline void addsubpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_ADDSUBPD, &dst, &src); + } + //! @brief Packed DP-FP Add/Subtract (SSE3). + inline void addsubpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_ADDSUBPD, &dst, &src); + } + + //! @brief Packed SP-FP Add/Subtract (SSE3). + inline void addsubps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_ADDSUBPS, &dst, &src); + } + //! @brief Packed SP-FP Add/Subtract (SSE3). + inline void addsubps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_ADDSUBPS, &dst, &src); + } + + //! @brief Store Integer with Truncation (SSE3). + inline void fisttp(const Mem& dst) + { + _emitInstruction(INST_FISTTP, &dst); + } + + //! @brief Packed DP-FP Horizontal Add (SSE3). + inline void haddpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_HADDPD, &dst, &src); + } + //! @brief Packed DP-FP Horizontal Add (SSE3). + inline void haddpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_HADDPD, &dst, &src); + } + + //! @brief Packed SP-FP Horizontal Add (SSE3). + inline void haddps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_HADDPS, &dst, &src); + } + //! @brief Packed SP-FP Horizontal Add (SSE3). + inline void haddps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_HADDPS, &dst, &src); + } + + //! @brief Packed DP-FP Horizontal Subtract (SSE3). + inline void hsubpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_HSUBPD, &dst, &src); + } + //! @brief Packed DP-FP Horizontal Subtract (SSE3). + inline void hsubpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_HSUBPD, &dst, &src); + } + + //! @brief Packed SP-FP Horizontal Subtract (SSE3). + inline void hsubps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_HSUBPS, &dst, &src); + } + //! @brief Packed SP-FP Horizontal Subtract (SSE3). + inline void hsubps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_HSUBPS, &dst, &src); + } + + //! @brief Load Unaligned Integer 128 Bits (SSE3). + inline void lddqu(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_LDDQU, &dst, &src); + } + + //! @brief Set Up Monitor Address (SSE3). + inline void monitor() + { + _emitInstruction(INST_MONITOR); + } + + //! @brief Move One DP-FP and Duplicate (SSE3). + inline void movddup(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVDDUP, &dst, &src); + } + //! @brief Move One DP-FP and Duplicate (SSE3). + inline void movddup(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVDDUP, &dst, &src); + } + + //! @brief Move Packed SP-FP High and Duplicate (SSE3). + inline void movshdup(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVSHDUP, &dst, &src); + } + //! @brief Move Packed SP-FP High and Duplicate (SSE3). + inline void movshdup(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVSHDUP, &dst, &src); + } + + //! @brief Move Packed SP-FP Low and Duplicate (SSE3). + inline void movsldup(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_MOVSLDUP, &dst, &src); + } + //! @brief Move Packed SP-FP Low and Duplicate (SSE3). + inline void movsldup(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVSLDUP, &dst, &src); + } + + //! @brief Monitor Wait (SSE3). + inline void mwait() + { + _emitInstruction(INST_MWAIT); + } + + // -------------------------------------------------------------------------- + // [SSSE3] + // -------------------------------------------------------------------------- + + //! @brief Packed SIGN (SSSE3). + inline void psignb(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSIGNB, &dst, &src); + } + //! @brief Packed SIGN (SSSE3). + inline void psignb(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSIGNB, &dst, &src); + } + + //! @brief Packed SIGN (SSSE3). + inline void psignb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSIGNB, &dst, &src); + } + //! @brief Packed SIGN (SSSE3). + inline void psignb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSIGNB, &dst, &src); + } + + //! @brief Packed SIGN (SSSE3). + inline void psignw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSIGNW, &dst, &src); + } + //! @brief Packed SIGN (SSSE3). + inline void psignw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSIGNW, &dst, &src); + } + + //! @brief Packed SIGN (SSSE3). + inline void psignw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSIGNW, &dst, &src); + } + //! @brief Packed SIGN (SSSE3). + inline void psignw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSIGNW, &dst, &src); + } + + //! @brief Packed SIGN (SSSE3). + inline void psignd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSIGND, &dst, &src); + } + //! @brief Packed SIGN (SSSE3). + inline void psignd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSIGND, &dst, &src); + } + + //! @brief Packed SIGN (SSSE3). + inline void psignd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSIGND, &dst, &src); + } + //! @brief Packed SIGN (SSSE3). + inline void psignd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSIGND, &dst, &src); + } + + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PHADDW, &dst, &src); + } + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PHADDW, &dst, &src); + } + + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PHADDW, &dst, &src); + } + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PHADDW, &dst, &src); + } + + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PHADDD, &dst, &src); + } + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PHADDD, &dst, &src); + } + + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PHADDD, &dst, &src); + } + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PHADDD, &dst, &src); + } + + //! @brief Packed Horizontal Add and Saturate (SSSE3). + inline void phaddsw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PHADDSW, &dst, &src); + } + //! @brief Packed Horizontal Add and Saturate (SSSE3). + inline void phaddsw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PHADDSW, &dst, &src); + } + + //! @brief Packed Horizontal Add and Saturate (SSSE3). + inline void phaddsw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PHADDSW, &dst, &src); + } + //! @brief Packed Horizontal Add and Saturate (SSSE3). + inline void phaddsw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PHADDSW, &dst, &src); + } + + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PHSUBW, &dst, &src); + } + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PHSUBW, &dst, &src); + } + + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PHSUBW, &dst, &src); + } + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PHSUBW, &dst, &src); + } + + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PHSUBD, &dst, &src); + } + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PHSUBD, &dst, &src); + } + + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PHSUBD, &dst, &src); + } + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PHSUBD, &dst, &src); + } + + //! @brief Packed Horizontal Subtract and Saturate (SSSE3). + inline void phsubsw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PHSUBSW, &dst, &src); + } + //! @brief Packed Horizontal Subtract and Saturate (SSSE3). + inline void phsubsw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PHSUBSW, &dst, &src); + } + + //! @brief Packed Horizontal Subtract and Saturate (SSSE3). + inline void phsubsw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PHSUBSW, &dst, &src); + } + //! @brief Packed Horizontal Subtract and Saturate (SSSE3). + inline void phsubsw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PHSUBSW, &dst, &src); + } + + //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3). + inline void pmaddubsw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PMADDUBSW, &dst, &src); + } + //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3). + inline void pmaddubsw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMADDUBSW, &dst, &src); + } + + //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3). + inline void pmaddubsw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMADDUBSW, &dst, &src); + } + //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3). + inline void pmaddubsw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMADDUBSW, &dst, &src); + } + + //! @brief Packed Absolute Value (SSSE3). + inline void pabsb(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PABSB, &dst, &src); + } + //! @brief Packed Absolute Value (SSSE3). + inline void pabsb(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PABSB, &dst, &src); + } + + //! @brief Packed Absolute Value (SSSE3). + inline void pabsb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PABSB, &dst, &src); + } + //! @brief Packed Absolute Value (SSSE3). + inline void pabsb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PABSB, &dst, &src); + } + + //! @brief Packed Absolute Value (SSSE3). + inline void pabsw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PABSW, &dst, &src); + } + //! @brief Packed Absolute Value (SSSE3). + inline void pabsw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PABSW, &dst, &src); + } + + //! @brief Packed Absolute Value (SSSE3). + inline void pabsw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PABSW, &dst, &src); + } + //! @brief Packed Absolute Value (SSSE3). + inline void pabsw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PABSW, &dst, &src); + } + + //! @brief Packed Absolute Value (SSSE3). + inline void pabsd(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PABSD, &dst, &src); + } + //! @brief Packed Absolute Value (SSSE3). + inline void pabsd(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PABSD, &dst, &src); + } + + //! @brief Packed Absolute Value (SSSE3). + inline void pabsd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PABSD, &dst, &src); + } + //! @brief Packed Absolute Value (SSSE3). + inline void pabsd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PABSD, &dst, &src); + } + + //! @brief Packed Multiply High with Round and Scale (SSSE3). + inline void pmulhrsw(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PMULHRSW, &dst, &src); + } + //! @brief Packed Multiply High with Round and Scale (SSSE3). + inline void pmulhrsw(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMULHRSW, &dst, &src); + } + + //! @brief Packed Multiply High with Round and Scale (SSSE3). + inline void pmulhrsw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMULHRSW, &dst, &src); + } + //! @brief Packed Multiply High with Round and Scale (SSSE3). + inline void pmulhrsw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMULHRSW, &dst, &src); + } + + //! @brief Packed Shuffle Bytes (SSSE3). + inline void pshufb(const MMReg& dst, const MMReg& src) + { + _emitInstruction(INST_PSHUFB, &dst, &src); + } + //! @brief Packed Shuffle Bytes (SSSE3). + inline void pshufb(const MMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSHUFB, &dst, &src); + } + + //! @brief Packed Shuffle Bytes (SSSE3). + inline void pshufb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PSHUFB, &dst, &src); + } + //! @brief Packed Shuffle Bytes (SSSE3). + inline void pshufb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PSHUFB, &dst, &src); + } + + //! @brief Packed Shuffle Bytes (SSSE3). + inline void palignr(const MMReg& dst, const MMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PALIGNR, &dst, &src, &imm8); + } + //! @brief Packed Shuffle Bytes (SSSE3). + inline void palignr(const MMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PALIGNR, &dst, &src, &imm8); + } + + //! @brief Packed Shuffle Bytes (SSSE3). + inline void palignr(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PALIGNR, &dst, &src, &imm8); + } + //! @brief Packed Shuffle Bytes (SSSE3). + inline void palignr(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PALIGNR, &dst, &src, &imm8); + } + + // -------------------------------------------------------------------------- + // [SSE4.1] + // -------------------------------------------------------------------------- + + //! @brief Blend Packed DP-FP Values (SSE4.1). + inline void blendpd(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_BLENDPD, &dst, &src, &imm8); + } + //! @brief Blend Packed DP-FP Values (SSE4.1). + inline void blendpd(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_BLENDPD, &dst, &src, &imm8); + } + + //! @brief Blend Packed SP-FP Values (SSE4.1). + inline void blendps(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_BLENDPS, &dst, &src, &imm8); + } + //! @brief Blend Packed SP-FP Values (SSE4.1). + inline void blendps(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_BLENDPS, &dst, &src, &imm8); + } + + //! @brief Variable Blend Packed DP-FP Values (SSE4.1). + inline void blendvpd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_BLENDVPD, &dst, &src); + } + //! @brief Variable Blend Packed DP-FP Values (SSE4.1). + inline void blendvpd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_BLENDVPD, &dst, &src); + } + + //! @brief Variable Blend Packed SP-FP Values (SSE4.1). + inline void blendvps(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_BLENDVPS, &dst, &src); + } + //! @brief Variable Blend Packed SP-FP Values (SSE4.1). + inline void blendvps(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_BLENDVPS, &dst, &src); + } + + //! @brief Dot Product of Packed DP-FP Values (SSE4.1). + inline void dppd(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_DPPD, &dst, &src, &imm8); + } + //! @brief Dot Product of Packed DP-FP Values (SSE4.1). + inline void dppd(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_DPPD, &dst, &src, &imm8); + } + + //! @brief Dot Product of Packed SP-FP Values (SSE4.1). + inline void dpps(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_DPPS, &dst, &src, &imm8); + } + //! @brief Dot Product of Packed SP-FP Values (SSE4.1). + inline void dpps(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_DPPS, &dst, &src, &imm8); + } + + //! @brief Extract Packed SP-FP Value (SSE4.1). + inline void extractps(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_EXTRACTPS, &dst, &src, &imm8); + } + //! @brief Extract Packed SP-FP Value (SSE4.1). + inline void extractps(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_EXTRACTPS, &dst, &src, &imm8); + } + + //! @brief Load Double Quadword Non-Temporal Aligned Hint (SSE4.1). + inline void movntdqa(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_MOVNTDQA, &dst, &src); + } + + //! @brief Compute Multiple Packed Sums of Absolute Difference (SSE4.1). + inline void mpsadbw(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_MPSADBW, &dst, &src, &imm8); + } + //! @brief Compute Multiple Packed Sums of Absolute Difference (SSE4.1). + inline void mpsadbw(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_MPSADBW, &dst, &src, &imm8); + } + + //! @brief Pack with Unsigned Saturation (SSE4.1). + inline void packusdw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PACKUSDW, &dst, &src); + } + //! @brief Pack with Unsigned Saturation (SSE4.1). + inline void packusdw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PACKUSDW, &dst, &src); + } + + //! @brief Variable Blend Packed Bytes (SSE4.1). + inline void pblendvb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PBLENDVB, &dst, &src); + } + //! @brief Variable Blend Packed Bytes (SSE4.1). + inline void pblendvb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PBLENDVB, &dst, &src); + } + + //! @brief Blend Packed Words (SSE4.1). + inline void pblendw(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PBLENDW, &dst, &src, &imm8); + } + //! @brief Blend Packed Words (SSE4.1). + inline void pblendw(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PBLENDW, &dst, &src, &imm8); + } + + //! @brief Compare Packed Qword Data for Equal (SSE4.1). + inline void pcmpeqq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PCMPEQQ, &dst, &src); + } + //! @brief Compare Packed Qword Data for Equal (SSE4.1). + inline void pcmpeqq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQQ, &dst, &src); + } + + //! @brief Extract Byte (SSE4.1). + inline void pextrb(const GPReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRB, &dst, &src, &imm8); + } + //! @brief Extract Byte (SSE4.1). + inline void pextrb(const Mem& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRB, &dst, &src, &imm8); + } + + //! @brief Extract Dword (SSE4.1). + inline void pextrd(const GPReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRD, &dst, &src, &imm8); + } + //! @brief Extract Dword (SSE4.1). + inline void pextrd(const Mem& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRD, &dst, &src, &imm8); + } + + //! @brief Extract Dword (SSE4.1). + inline void pextrq(const GPReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRQ, &dst, &src, &imm8); + } + //! @brief Extract Dword (SSE4.1). + inline void pextrq(const Mem& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRQ, &dst, &src, &imm8); + } + + //! @brief Extract Word (SSE4.1). + inline void pextrw(const GPReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRW, &dst, &src, &imm8); + } + //! @brief Extract Word (SSE4.1). + inline void pextrw(const Mem& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRW, &dst, &src, &imm8); + } + + //! @brief Packed Horizontal Word Minimum (SSE4.1). + inline void phminposuw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PHMINPOSUW, &dst, &src); + } + //! @brief Packed Horizontal Word Minimum (SSE4.1). + inline void phminposuw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PHMINPOSUW, &dst, &src); + } + + //! @brief Insert Byte (SSE4.1). + inline void pinsrb(const XMMReg& dst, const GPReg& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRB, &dst, &src, &imm8); + } + //! @brief Insert Byte (SSE4.1). + inline void pinsrb(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRB, &dst, &src, &imm8); + } + + //! @brief Insert Dword (SSE4.1). + inline void pinsrd(const XMMReg& dst, const GPReg& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRD, &dst, &src, &imm8); + } + //! @brief Insert Dword (SSE4.1). + inline void pinsrd(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRD, &dst, &src, &imm8); + } + + //! @brief Insert Dword (SSE4.1). + inline void pinsrq(const XMMReg& dst, const GPReg& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRQ, &dst, &src, &imm8); + } + //! @brief Insert Dword (SSE4.1). + inline void pinsrq(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRQ, &dst, &src, &imm8); + } + + //! @brief Insert Word (SSE2). + inline void pinsrw(const XMMReg& dst, const GPReg& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRW, &dst, &src, &imm8); + } + //! @brief Insert Word (SSE2). + inline void pinsrw(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRW, &dst, &src, &imm8); + } + + //! @brief Maximum of Packed Word Integers (SSE4.1). + inline void pmaxuw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMAXUW, &dst, &src); + } + //! @brief Maximum of Packed Word Integers (SSE4.1). + inline void pmaxuw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMAXUW, &dst, &src); + } + + //! @brief Maximum of Packed Signed Byte Integers (SSE4.1). + inline void pmaxsb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMAXSB, &dst, &src); + } + //! @brief Maximum of Packed Signed Byte Integers (SSE4.1). + inline void pmaxsb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMAXSB, &dst, &src); + } + + //! @brief Maximum of Packed Signed Dword Integers (SSE4.1). + inline void pmaxsd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMAXSD, &dst, &src); + } + //! @brief Maximum of Packed Signed Dword Integers (SSE4.1). + inline void pmaxsd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMAXSD, &dst, &src); + } + + //! @brief Maximum of Packed Unsigned Dword Integers (SSE4.1). + inline void pmaxud(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMAXUD, &dst, &src); + } + //! @brief Maximum of Packed Unsigned Dword Integers (SSE4.1). + inline void pmaxud(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMAXUD, &dst, &src); + } + + //! @brief Minimum of Packed Signed Byte Integers (SSE4.1). + inline void pminsb(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMINSB, &dst, &src); + } + //! @brief Minimum of Packed Signed Byte Integers (SSE4.1). + inline void pminsb(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMINSB, &dst, &src); + } + + //! @brief Minimum of Packed Word Integers (SSE4.1). + inline void pminuw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMINUW, &dst, &src); + } + //! @brief Minimum of Packed Word Integers (SSE4.1). + inline void pminuw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMINUW, &dst, &src); + } + + //! @brief Minimum of Packed Dword Integers (SSE4.1). + inline void pminud(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMINUD, &dst, &src); + } + //! @brief Minimum of Packed Dword Integers (SSE4.1). + inline void pminud(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMINUD, &dst, &src); + } + + //! @brief Minimum of Packed Dword Integers (SSE4.1). + inline void pminsd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMINSD, &dst, &src); + } + //! @brief Minimum of Packed Dword Integers (SSE4.1). + inline void pminsd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMINSD, &dst, &src); + } + + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxbw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMOVSXBW, &dst, &src); + } + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxbw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMOVSXBW, &dst, &src); + } + + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxbd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMOVSXBD, &dst, &src); + } + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxbd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMOVSXBD, &dst, &src); + } + + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxbq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMOVSXBQ, &dst, &src); + } + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxbq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMOVSXBQ, &dst, &src); + } + + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxwd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMOVSXWD, &dst, &src); + } + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxwd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMOVSXWD, &dst, &src); + } + + //! @brief (SSE4.1). + inline void pmovsxwq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMOVSXWQ, &dst, &src); + } + //! @brief (SSE4.1). + inline void pmovsxwq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMOVSXWQ, &dst, &src); + } + + //! @brief (SSE4.1). + inline void pmovsxdq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMOVSXDQ, &dst, &src); + } + //! @brief (SSE4.1). + inline void pmovsxdq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMOVSXDQ, &dst, &src); + } + + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxbw(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMOVZXBW, &dst, &src); + } + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxbw(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMOVZXBW, &dst, &src); + } + + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxbd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMOVZXBD, &dst, &src); + } + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxbd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMOVZXBD, &dst, &src); + } + + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxbq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMOVZXBQ, &dst, &src); + } + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxbq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMOVZXBQ, &dst, &src); + } + + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxwd(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMOVZXWD, &dst, &src); + } + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxwd(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMOVZXWD, &dst, &src); + } + + //! @brief (SSE4.1). + inline void pmovzxwq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMOVZXWQ, &dst, &src); + } + //! @brief (SSE4.1). + inline void pmovzxwq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMOVZXWQ, &dst, &src); + } + + //! @brief (SSE4.1). + inline void pmovzxdq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMOVZXDQ, &dst, &src); + } + //! @brief (SSE4.1). + inline void pmovzxdq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMOVZXDQ, &dst, &src); + } + + //! @brief Multiply Packed Signed Dword Integers (SSE4.1). + inline void pmuldq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMULDQ, &dst, &src); + } + //! @brief Multiply Packed Signed Dword Integers (SSE4.1). + inline void pmuldq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMULDQ, &dst, &src); + } + + //! @brief Multiply Packed Signed Integers and Store Low Result (SSE4.1). + inline void pmulld(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PMULLD, &dst, &src); + } + //! @brief Multiply Packed Signed Integers and Store Low Result (SSE4.1). + inline void pmulld(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PMULLD, &dst, &src); + } + + //! @brief Logical Compare (SSE4.1). + inline void ptest(const XMMReg& op1, const XMMReg& op2) + { + _emitInstruction(INST_PTEST, &op1, &op2); + } + //! @brief Logical Compare (SSE4.1). + inline void ptest(const XMMReg& op1, const Mem& op2) + { + _emitInstruction(INST_PTEST, &op1, &op2); + } + + //! Round Packed SP-FP Values @brief (SSE4.1). + inline void roundps(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDPS, &dst, &src, &imm8); + } + //! Round Packed SP-FP Values @brief (SSE4.1). + inline void roundps(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDPS, &dst, &src, &imm8); + } + + //! @brief Round Scalar SP-FP Values (SSE4.1). + inline void roundss(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDSS, &dst, &src, &imm8); + } + //! @brief Round Scalar SP-FP Values (SSE4.1). + inline void roundss(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDSS, &dst, &src, &imm8); + } + + //! @brief Round Packed DP-FP Values (SSE4.1). + inline void roundpd(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDPD, &dst, &src, &imm8); + } + //! @brief Round Packed DP-FP Values (SSE4.1). + inline void roundpd(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDPD, &dst, &src, &imm8); + } + + //! @brief Round Scalar DP-FP Values (SSE4.1). + inline void roundsd(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDSD, &dst, &src, &imm8); + } + //! @brief Round Scalar DP-FP Values (SSE4.1). + inline void roundsd(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDSD, &dst, &src, &imm8); + } + + // -------------------------------------------------------------------------- + // [SSE4.2] + // -------------------------------------------------------------------------- + + //! @brief Accumulate CRC32 Value (polynomial 0x11EDC6F41) (SSE4.2). + inline void crc32(const GPReg& dst, const GPReg& src) + { + ASMJIT_ASSERT(dst.isRegType(REG_TYPE_GPD) || dst.isRegType(REG_TYPE_GPQ)); + _emitInstruction(INST_CRC32, &dst, &src); + } + //! @brief Accumulate CRC32 Value (polynomial 0x11EDC6F41) (SSE4.2). + inline void crc32(const GPReg& dst, const Mem& src) + { + ASMJIT_ASSERT(dst.isRegType(REG_TYPE_GPD) || dst.isRegType(REG_TYPE_GPQ)); + _emitInstruction(INST_CRC32, &dst, &src); + } + + //! @brief Packed Compare Explicit Length Strings, Return Index (SSE4.2). + inline void pcmpestri(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPESTRI, &dst, &src, &imm8); + } + //! @brief Packed Compare Explicit Length Strings, Return Index (SSE4.2). + inline void pcmpestri(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPESTRI, &dst, &src, &imm8); + } + + //! @brief Packed Compare Explicit Length Strings, Return Mask (SSE4.2). + inline void pcmpestrm(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPESTRM, &dst, &src, &imm8); + } + //! @brief Packed Compare Explicit Length Strings, Return Mask (SSE4.2). + inline void pcmpestrm(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPESTRM, &dst, &src, &imm8); + } + + //! @brief Packed Compare Implicit Length Strings, Return Index (SSE4.2). + inline void pcmpistri(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPISTRI, &dst, &src, &imm8); + } + //! @brief Packed Compare Implicit Length Strings, Return Index (SSE4.2). + inline void pcmpistri(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPISTRI, &dst, &src, &imm8); + } + + //! @brief Packed Compare Implicit Length Strings, Return Mask (SSE4.2). + inline void pcmpistrm(const XMMReg& dst, const XMMReg& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPISTRM, &dst, &src, &imm8); + } + //! @brief Packed Compare Implicit Length Strings, Return Mask (SSE4.2). + inline void pcmpistrm(const XMMReg& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPISTRM, &dst, &src, &imm8); + } + + //! @brief Compare Packed Data for Greater Than (SSE4.2). + inline void pcmpgtq(const XMMReg& dst, const XMMReg& src) + { + _emitInstruction(INST_PCMPGTQ, &dst, &src); + } + //! @brief Compare Packed Data for Greater Than (SSE4.2). + inline void pcmpgtq(const XMMReg& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTQ, &dst, &src); + } + + //! @brief Return the Count of Number of Bits Set to 1 (SSE4.2). + inline void popcnt(const GPReg& dst, const GPReg& src) + { + ASMJIT_ASSERT(!dst.isGPB()); + ASMJIT_ASSERT(src.getRegType() == dst.getRegType()); + _emitInstruction(INST_POPCNT, &dst, &src); + } + //! @brief Return the Count of Number of Bits Set to 1 (SSE4.2). + inline void popcnt(const GPReg& dst, const Mem& src) + { + ASMJIT_ASSERT(!dst.isGPB()); + _emitInstruction(INST_POPCNT, &dst, &src); + } + + // ------------------------------------------------------------------------- + // [AMD only] + // ------------------------------------------------------------------------- + + //! @brief Prefetch (3dNow - Amd). + //! + //! Loads the entire 64-byte aligned memory sequence containing the + //! specified memory address into the L1 data cache. The position of + //! the specified memory address within the 64-byte cache line is + //! irrelevant. If a cache hit occurs, or if a memory fault is detected, + //! no bus cycle is initiated and the instruction is treated as a NOP. + inline void amd_prefetch(const Mem& mem) + { + _emitInstruction(INST_AMD_PREFETCH, &mem); + } + + //! @brief Prefetch and set cache to modified (3dNow - Amd). + //! + //! The PREFETCHW instruction loads the prefetched line and sets the + //! cache-line state to Modified, in anticipation of subsequent data + //! writes to the line. The PREFETCH instruction, by contrast, typically + //! sets the cache-line state to Exclusive (depending on the hardware + //! implementation). + inline void amd_prefetchw(const Mem& mem) + { + _emitInstruction(INST_AMD_PREFETCHW, &mem); + } + + // ------------------------------------------------------------------------- + // [Intel only] + // ------------------------------------------------------------------------- + + //! @brief Move Data After Swapping Bytes (SSE3 - Intel Atom). + inline void movbe(const GPReg& dst, const Mem& src) + { + ASMJIT_ASSERT(!dst.isGPB()); + _emitInstruction(INST_MOVBE, &dst, &src); + } + + //! @brief Move Data After Swapping Bytes (SSE3 - Intel Atom). + inline void movbe(const Mem& dst, const GPReg& src) + { + ASMJIT_ASSERT(!src.isGPB()); + _emitInstruction(INST_MOVBE, &dst, &src); + } + + // ------------------------------------------------------------------------- + // [Emit Options] + // ------------------------------------------------------------------------- + + //! @brief Assert LOCK# Signal Prefix. + //! + //! This instruction causes the processor's LOCK# signal to be asserted + //! during execution of the accompanying instruction (turns the + //! instruction into an atomic instruction). In a multiprocessor environment, + //! the LOCK# signal insures that the processor has exclusive use of any shared + //! memory while the signal is asserted. + //! + //! The LOCK prefix can be prepended only to the following instructions and + //! to those forms of the instructions that use a memory operand: ADD, ADC, + //! AND, BTC, BTR, BTS, CMPXCHG, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, + //! and XCHG. An undefined opcode exception will be generated if the LOCK + //! prefix is used with any other instruction. The XCHG instruction always + //! asserts the LOCK# signal regardless of the presence or absence of the LOCK + //! prefix. + //! + //! @sa @c EMIT_OPTION_LOCK_PREFIX. + inline void lock() + { + _emitOptions |= EMIT_OPTION_LOCK_PREFIX; + } + + //! @brief Force REX prefix to be emitted. + //! + //! This option should be used carefully, because there are unencodable + //! combinations. If you want to access ah, bh, ch or dh registers then you + //! can't emit REX prefix and it will cause an illegal instruction error. + //! + //! @note REX prefix is only valid for X64/AMD64 platform. + //! + //! @sa @c EMIT_OPTION_REX_PREFIX. + inline void rex() + { + _emitOptions |= EMIT_OPTION_REX_PREFIX; + } +}; + +// ============================================================================ +// [AsmJit::Assembler] +// ============================================================================ + +//! @brief Assembler - low level code generation. +//! +//! @c Assembler is the main class in AsmJit for generating low level x86/x64 +//! binary stream. It creates internal buffer where opcodes are stored and +//! contains methods that mimics x86/x64 assembler instructions. Code generation +//! should be safe, because basic type-checks are done by the C++ compiler. It's +//! nearly impossible to create invalid instruction (for example +//! <code>mov [eax], [eax]</code> that will not be detected at compile time. +//! +//! Each call to assembler intrinsics directly emits instruction to internal +//! binary stream. Instruction emitting also contains runtime checks so it's +//! impossible to create instruction that is not valid. +//! +//! @c AsmJit::Assembler contains internal buffer where all emitted +//! instructions are stored. Look at @c AsmJit::Buffer for buffer +//! implementation. To generate and allocate memory for function use +//! @c AsmJit::Assembler::make() method that will allocate memory using +//! provided memory manager ( see @c AsmJit::MemoryManager::getGlobal() ) and +//! relocates code to provided address. If you want to create your function +//! manually, you should look at @c AsmJit::VirtualMemory and use +//! @c AsmJit::Assembler::relocCode() method to relocate emitted code into +//! provided memory location. You can also take emitted buffer by @c take() +//! method to do something else with it. If you take buffer, you must free it +//! manually by @c ASMJIT_FREE() macro. +//! +//! @note Always use this class and never use @c AssemblerCore or +//! @c AssemblerIntrinsics classes directly. +//! +//! @section AsmJit_Assembler_CodeGeneration Code Generation +//! +//! To generate code is only needed to create instance of @c AsmJit::Assembler +//! and to use intrinsics. See example how to do that: +//! +//! @code +//! // Use AsmJit namespace. +//! using namespace AsmJit; +//! +//! // Create Assembler instance. +//! Assembler a; +//! +//! // Prolog. +//! a.push(ebp); +//! a.mov(ebp, esp); +//! +//! // Mov 1024 to EAX, EAX is also return value. +//! a.mov(eax, imm(1024)); +//! +//! // Epilog. +//! a.mov(esp, ebp); +//! a.pop(ebp); +//! +//! // Return. +//! a.ret(); +//! @endcode +//! +//! You can see that syntax is very close to Intel one. Only difference is that +//! you are calling functions that emits the binary code for you. All registers +//! are in @c AsmJit namespace, so it's very comfortable to use it (look at +//! first line). There is also used method @c AsmJit::imm() to create an +//! immediate value. Use @c AsmJit::uimm() to create unsigned immediate value. +//! +//! There is also possibility to use memory addresses and immediates. To build +//! memory address use @c ptr(), @c byte_ptr(), @c word_ptr(), @c dword_ptr() +//! or other friend methods. In most cases you needs only @c ptr() method, but +//! there are instructions where you must specify address size, +//! +//! for example (a is @c AsmJit::Assembler instance): +//! +//! @code +//! a.mov(ptr(eax), imm(0)); // mov ptr [eax], 0 +//! a.mov(ptr(eax), edx); // mov ptr [eax], edx +//! @endcode +//! +//! But it's also possible to create complex addresses: +//! +//! @code +//! // eax + ecx*x addresses +//! a.mov(ptr(eax, ecx, TIMES_1), imm(0)); // mov ptr [eax + ecx], 0 +//! a.mov(ptr(eax, ecx, TIMES_2), imm(0)); // mov ptr [eax + ecx * 2], 0 +//! a.mov(ptr(eax, ecx, TIMES_4), imm(0)); // mov ptr [eax + ecx * 4], 0 +//! a.mov(ptr(eax, ecx, TIMES_8), imm(0)); // mov ptr [eax + ecx * 8], 0 +//! // eax + ecx*x + disp addresses +//! a.mov(ptr(eax, ecx, TIMES_1, 4), imm(0)); // mov ptr [eax + ecx + 4], 0 +//! a.mov(ptr(eax, ecx, TIMES_2, 8), imm(0)); // mov ptr [eax + ecx * 2 + 8], 0 +//! a.mov(ptr(eax, ecx, TIMES_4, 12), imm(0)); // mov ptr [eax + ecx * 4 + 12], 0 +//! a.mov(ptr(eax, ecx, TIMES_8, 16), imm(0)); // mov ptr [eax + ecx * 8 + 16], 0 +//! @endcode +//! +//! All addresses shown are using @c AsmJit::ptr() to make memory operand. +//! Some assembler instructions (single operand ones) needs to specify memory +//! operand size. For example calling <code>a.inc(ptr(eax))</code> can't be +//! used. @c AsmJit::Assembler::inc(), @c AsmJit::Assembler::dec() and similar +//! instructions can't be serialized without specifying how bytes they are +//! operating on. See next code how assembler works: +//! +//! @code +//! // [byte] address +//! a.inc(byte_ptr(eax)); // inc byte ptr [eax] +//! a.dec(byte_ptr(eax)); // dec byte ptr [eax] +//! // [word] address +//! a.inc(word_ptr(eax)); // inc word ptr [eax] +//! a.dec(word_ptr(eax)); // dec word ptr [eax] +//! // [dword] address +//! a.inc(dword_ptr(eax)); // inc dword ptr [eax] +//! a.dec(dword_ptr(eax)); // dec dword ptr [eax] +//! @endcode +//! +//! @section AsmJit_Assembler_CallingJitCode Calling JIT Code +//! +//! While you are over from emitting instructions, you can make your function +//! using @c AsmJit::Assembler::make() method. This method will use memory +//! manager to allocate virtual memory and relocates generated code to it. For +//! memory allocation is used global memory manager by default and memory is +//! freeable, but of course this default behavior can be overriden specifying +//! your memory manager and allocation type. If you want to do with code +//! something else you can always override make() method and do what you want. +//! +//! You can get size of generated code by @c getCodeSize() or @c getOffset() +//! methods. These methods returns you code size (or more precisely current code +//! offset) in bytes. Use takeCode() to take internal buffer (all pointers in +//! @c AsmJit::Assembler instance will be zeroed and current buffer returned) +//! to use it. If you don't take it, @c AsmJit::Assembler destructor will +//! free it automatically. To alloc and run code manually don't use +//! @c malloc()'ed memory, but instead use @c AsmJit::VirtualMemory::alloc() +//! to get memory for executing (specify @c canExecute to @c true) or +//! @c AsmJit::MemoryManager that provides more effective and comfortable way +//! to allocate virtual memory. +//! +//! See next example how to allocate memory where you can execute code created +//! by @c AsmJit::Assembler: +//! +//! @code +//! using namespace AsmJit; +//! +//! Assembler a; +//! +//! // ... your code generation +//! +//! // your function prototype +//! typedef void (*MyFn)(); +//! +//! // make your function +//! MyFn fn = function_cast<MyFn>(a.make()); +//! +//! // call your function +//! fn(); +//! +//! // If you don't need your function again, free it. +//! MemoryManager::getGlobal()->free(fn); +//! @endcode +//! +//! There is also low level alternative how to allocate virtual memory and +//! relocate code to it: +//! +//! @code +//! using namespace AsmJit; +//! +//! Assembler a; +//! // Your code generation ... +//! +//! // Your function prototype. +//! typedef void (*MyFn)(); +//! +//! // Alloc memory for your function. +//! MyFn fn = function_cast<MyFn>( +//! MemoryManager::getGlobal()->alloc(a.getCodeSize()); +//! +//! // Relocate the code (will make the function). +//! a.relocCode(fn); +//! +//! // Call the generated function. +//! fn(); +//! +//! // If you don't need your function anymore, it should be freed. +//! MemoryManager::getGlobal()->free(fn); +//! @endcode +//! +//! @c note This was very primitive example how to call generated code. +//! In real production code you will never alloc and free code for one run, +//! you will usually use generated code many times. +//! +//! @section AsmJit_Assembler_Labels Labels +//! +//! While generating assembler code, you will usually need to create complex +//! code with labels. Labels are fully supported and you can call @c jmp or +//! @c je (and similar) instructions to initialized or yet uninitialized label. +//! Each label expects to be bound into offset. To bind label to specific +//! offset, use @c bind() method. +//! +//! See next example that contains complete code that creates simple memory +//! copy function (in DWORD entities). +//! +//! @code +//! // Example: Usage of Label (32-bit code). +//! // +//! // Create simple DWORD memory copy function: +//! // ASMJIT_STDCALL void copy32(uint32_t* dst, const uint32_t* src, sysuint_t count); +//! using namespace AsmJit; +//! +//! // Assembler instance. +//! Assembler a; +//! +//! // Constants. +//! const int arg_offset = 8; // Arguments offset (STDCALL EBP). +//! const int arg_size = 12; // Arguments size. +//! +//! // Labels. +//! Label L_Loop = a.newLabel(); +//! +//! // Prolog. +//! a.push(ebp); +//! a.mov(ebp, esp); +//! a.push(esi); +//! a.push(edi); +//! +//! // Fetch arguments +//! a.mov(esi, dword_ptr(ebp, arg_offset + 0)); // Get dst. +//! a.mov(edi, dword_ptr(ebp, arg_offset + 4)); // Get src. +//! a.mov(ecx, dword_ptr(ebp, arg_offset + 8)); // Get count. +//! +//! // Bind L_Loop label to here. +//! a.bind(L_Loop); +//! +//! Copy 4 bytes. +//! a.mov(eax, dword_ptr(esi)); +//! a.mov(dword_ptr(edi), eax); +//! +//! // Increment pointers. +//! a.add(esi, 4); +//! a.add(edi, 4); +//! +//! // Repeat loop until (--ecx != 0). +//! a.dec(ecx); +//! a.jz(L_Loop); +//! +//! // Epilog. +//! a.pop(edi); +//! a.pop(esi); +//! a.mov(esp, ebp); +//! a.pop(ebp); +//! +//! // Return: STDCALL convention is to pop stack in called function. +//! a.ret(arg_size); +//! @endcode +//! +//! If you need more abstraction for generating assembler code and you want +//! to hide calling conventions between 32-bit and 64-bit operating systems, +//! look at @c Compiler class that is designed for higher level code +//! generation. +//! +//! @section AsmJit_Assembler_AdvancedCodeGeneration Advanced Code Generation +//! +//! This section describes some advanced generation features of @c Assembler +//! class which can be simply overlooked. The first thing that is very likely +//! needed is generic register support. In previous example the named registers +//! were used. AsmJit contains functions which can convert register index into +//! operand and back. +//! +//! Let's define function which can be used to generate some abstract code: +//! +//! @code +//! // Simple function that generates dword copy. +//! void genCopyDWord( +//! Assembler& a, +//! const GPReg& dst, const GPReg& src, const GPReg& tmp) +//! { +//! a.mov(tmp, dword_ptr(src)); +//! a.mov(dword_ptr(dst), tmp); +//! } +//! @endcode +//! +//! This function can be called like <code>genCopyDWord(a, edi, esi, ebx)</code> +//! or by using existing @ref GPReg instances. This abstraction allows to join +//! more code sections together without rewriting each to use specific registers. +//! You need to take care only about implicit registers which may be used by +//! several instructions (like mul, imul, div, idiv, shifting, etc...). +//! +//! Next, more advanced, but often needed technique is that you can build your +//! own registers allocator. X86 architecture contains 8 general purpose registers, +//! 8 MMX (MM) registers and 8 SSE (XMM) registers. The X64 (AMD64) architecture +//! extends count of general purpose registers and SSE2 registers to 16. Use the +//! @c REG_NUM_BASE constant to get count of GP or XMM registers or @c REG_NUM_GP, +//! @c REG_NUM_MM and @c REG_NUM_XMM constants individually. +//! +//! To build register from index (value from 0 inclusive to REG_NUM_XXX +//! exclusive) use @ref gpd(), @ref gpq() or @ref gpn() functions. To create +//! a 8 or 16-bit register use @ref gpw(), @ref gpb_lo() or @ref gpb_hi(). +//! To create other registers there are similar methods @ref mm(), @ref xmm() and +//! @ref st(). +//! +//! So our function call to genCopyDWord can be also used like this: +//! +//! @code +//! genCopyDWord(a, gpd(REG_INDEX_EDI), gpd(REG_INDEX_ESI), gpd(REG_INDEX_EBX)); +//! @endcode +//! +//! REG_INDEX_XXX are constants defined by @ref REG_INDEX enum. You can use your +//! own register allocator (or register slot manager) to alloc / free registers +//! so REG_INDEX_XXX values can be replaced by your variables (0 to REG_NUM_XXX-1). +//! +//! @sa @c AssemblerCore, @c AssemblerIntrinsics, @c Operand, @c Compiler. +struct ASMJIT_API Assembler : public AssemblerIntrinsics +{ + Assembler(CodeGenerator* codeGenerator = NULL) ASMJIT_NOTHROW; + virtual ~Assembler() ASMJIT_NOTHROW; +}; + +//! @} + +} // AsmJit namespace + +// [Api-End] +#include "ApiEnd.h" + +// [Guard] +#endif // _ASMJIT_ASSEMBLERX86X64_H diff --git a/lib/AsmJit/Build.h b/lib/AsmJit/Build.h new file mode 100644 index 0000000..1dc000e --- /dev/null +++ b/lib/AsmJit/Build.h @@ -0,0 +1,304 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_BUILD_H +#define _ASMJIT_BUILD_H + +// [Include] +#include "Config.h" + +// Here should be optional include files that's needed fo successfuly +// use macros defined here. Remember, AsmJit uses only AsmJit namespace +// and all macros are used within it. +#include <stdio.h> +#include <stdlib.h> + +// ---------------------------------------------------------------------------- +// [AsmJit - OS] +// ---------------------------------------------------------------------------- + +#if defined(WINDOWS) || defined(__WINDOWS__) || defined(_WIN32) || defined(_WIN64) +# define ASMJIT_WINDOWS +#elif defined(__linux__) || defined(__unix__) || \ + defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || \ + defined(__DragonFly__) || defined(__BSD__) || defined(__FREEBSD__) || \ + defined(__APPLE__) +# define ASMJIT_POSIX +#else +# warning "AsmJit - Can't match operating system, using ASMJIT_POSIX" +# define ASMJIT_POSIX +#endif + +// ---------------------------------------------------------------------------- +// [AsmJit - Architecture] +// ---------------------------------------------------------------------------- + +// define it only if it's not defined. In some systems we can +// use -D command in compiler to bypass this autodetection. +#if !defined(ASMJIT_X86) && !defined(ASMJIT_X64) +# if defined(__x86_64__) || defined(__LP64) || defined(__IA64__) || \ + defined(_M_X64) || defined(_WIN64) +# define ASMJIT_X64 // x86-64 +# else +// _M_IX86, __INTEL__, __i386__ +# define ASMJIT_X86 +# endif +#endif + +// ---------------------------------------------------------------------------- +// [AsmJit - API] +// ---------------------------------------------------------------------------- + +// Hide AsmJit symbols that we don't want to export (AssemblerIntrinsics class for example). +#if !defined(ASMJIT_HIDDEN) +# if defined(__GNUC__) && __GNUC__ >= 4 +# define ASMJIT_HIDDEN __attribute__((visibility("hidden"))) +# endif // __GNUC__ && __GNUC__ >= 4 +#endif // ASMJIT_HIDDEN + +// Make AsmJit as shared library by default. +#if !defined(ASMJIT_API) +# if defined(ASMJIT_WINDOWS) +# if defined(__GNUC__) +# if defined(AsmJit_EXPORTS) +# define ASMJIT_API __attribute__((dllexport)) +# else +# define ASMJIT_API __attribute__((dllimport)) +# endif // AsmJit_EXPORTS +# else +# if defined(AsmJit_EXPORTS) +# define ASMJIT_API __declspec(dllexport) +# else +# define ASMJIT_API __declspec(dllimport) +# endif // AsmJit_EXPORTS +# endif // __GNUC__ +# else +# if defined(__GNUC__) +# if __GNUC__ >= 4 +# define ASMJIT_API __attribute__((visibility("default"))) +# define ASMJIT_VAR extern ASMJIT_API +# endif // __GNUC__ >= 4 +# endif // __GNUC__ +# endif +#endif // ASMJIT_API + +#if defined(ASMJIT_API) +# define ASMJIT_VAR extern ASMJIT_API +#else +# define ASMJIT_API +# define ASMJIT_VAR +#endif // ASMJIT_API + +// If not detected, fallback to nothing. +#if !defined(ASMJIT_HIDDEN) +# define ASMJIT_HIDDEN +#endif // ASMJIT_HIDDEN + +#if !defined(ASMJIT_NOTHROW) +#define ASMJIT_NOTHROW throw() +#endif // ASMJIT_NOTHROW + +// [AsmJit - Memory Management] +#if !defined(ASMJIT_MALLOC) +# define ASMJIT_MALLOC ::malloc +#endif // ASMJIT_MALLOC + +#if !defined(ASMJIT_REALLOC) +# define ASMJIT_REALLOC ::realloc +#endif // ASMJIT_REALLOC + +#if !defined(ASMJIT_FREE) +# define ASMJIT_FREE ::free +#endif // ASMJIT_FREE + +// ---------------------------------------------------------------------------- +// [AsmJit - Calling Conventions] +// ---------------------------------------------------------------------------- + +#if defined(ASMJIT_X86) +# if defined(__GNUC__) +# define ASMJIT_REGPARM_1 __attribute__((regparm(1))) +# define ASMJIT_REGPARM_2 __attribute__((regparm(2))) +# define ASMJIT_REGPARM_3 __attribute__((regparm(3))) +# define ASMJIT_FASTCALL __attribute__((fastcall)) +# define ASMJIT_STDCALL __attribute__((stdcall)) +# define ASMJIT_CDECL __attribute__((cdecl)) +# else +# define ASMJIT_FASTCALL __fastcall +# define ASMJIT_STDCALL __stdcall +# define ASMJIT_CDECL __cdecl +# endif +#else +# define ASMJIT_FASTCALL +# define ASMJIT_STDCALL +# define ASMJIT_CDECL +#endif // ASMJIT_X86 + +#if !defined(ASMJIT_UNUSED) +# define ASMJIT_UNUSED(var) ((void)var) +#endif // ASMJIT_UNUSED + +#if !defined(ASMJIT_NOP) +# define ASMJIT_NOP() ((void)0) +#endif // ASMJIT_NOP + +// [AsmJit - C++ Compiler Support] +#define ASMJIT_TYPE_TO_TYPE(type) type +#define ASMJIT_HAS_STANDARD_DEFINE_OPTIONS +#define ASMJIT_HAS_PARTIAL_TEMPLATE_SPECIALIZATION + +// Support for VC6 +#if defined(_MSC_VER) && (_MSC_VER < 1400) +#undef ASMJIT_TYPE_TO_TYPE +namespace AsmJit { + template<typename T> + struct _Type2Type { typedef T Type; }; +} +#define ASMJIT_TYPE_TO_TYPE(T) _Type2Type<T>::Type + +#undef ASMJIT_HAS_STANDARD_DEFINE_OPTIONS +#undef ASMJIT_HAS_PARTIAL_TEMPLATE_SPECIALIZATION + +#endif + +// ---------------------------------------------------------------------------- +// [AsmJit - Types] +// ---------------------------------------------------------------------------- + +#if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER >= 1600) + +// Use <stdint.h> +#include <stdint.h> + +#else + +// Use typedefs. +#if defined(_MSC_VER) +#if (_MSC_VER < 1300) +typedef char int8_t; +typedef short int16_t; +typedef int int32_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +typedef __int8 int8_t; +typedef __int16 int16_t; +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#endif +#endif // _MSC_VER +#endif // STDINT.H + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + +#if defined(ASMJIT_X86) +typedef int32_t sysint_t; +typedef uint32_t sysuint_t; +#else +typedef int64_t sysint_t; +typedef uint64_t sysuint_t; +#endif + +#if defined(_MSC_VER) +# define ASMJIT_INT64_C(num) num##i64 +# define ASMJIT_UINT64_C(num) num##ui64 +#else +# define ASMJIT_INT64_C(num) num##LL +# define ASMJIT_UINT64_C(num) num##ULL +#endif + +// ---------------------------------------------------------------------------- +// [AsmJit - C++ Macros] +// ---------------------------------------------------------------------------- + +#define ASMJIT_ARRAY_SIZE(A) (sizeof(A) / sizeof(*A)) + +#define ASMJIT_DISABLE_COPY(__type__) \ +private: \ + inline __type__(const __type__& other); \ + inline __type__& operator=(const __type__& other); + +// ---------------------------------------------------------------------------- +// [AsmJit - Debug] +// ---------------------------------------------------------------------------- + +// If ASMJIT_DEBUG and ASMJIT_NO_DEBUG is not defined then ASMJIT_DEBUG will be +// detected using the compiler specific macros. This enables to set the build +// type using IDE. +#if !defined(ASMJIT_DEBUG) && !defined(ASMJIT_NO_DEBUG) + +#if defined(_DEBUG) +#define ASMJIT_DEBUG +#endif // _DEBUG + +#endif // !ASMJIT_DEBUG && !ASMJIT_NO_DEBUG + +// ---------------------------------------------------------------------------- +// [AsmJit - Assert] +// ---------------------------------------------------------------------------- + +namespace AsmJit { +ASMJIT_API void assertionFailure(const char* file, int line, const char* exp); +} // AsmJit namespace + +#if defined(ASMJIT_DEBUG) +# if !defined(ASMJIT_ASSERT) +# define ASMJIT_ASSERT(exp) do { if (!(exp)) ::AsmJit::assertionFailure(__FILE__, __LINE__, #exp); } while(0) +# endif +#else +# if !defined(ASMJIT_ASSERT) +# define ASMJIT_ASSERT(exp) ASMJIT_NOP() +# endif +#endif // DEBUG + +// GCC warnings fix: I can't understand why GCC has no interface to push/pop +// specific warnings. +// #if defined(__GNUC__) +// # if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 402001 +// # pragma GCC diagnostic ignored "-w" +// # endif +// #endif // __GNUC__ + +// ---------------------------------------------------------------------------- +// [AsmJit - OS Support] +// ---------------------------------------------------------------------------- + +#if defined(ASMJIT_WINDOWS) +#include <Windows.h> +#endif // ASMJIT_WINDOWS + +// [Guard] +#endif // _ASMJIT_BUILD_H diff --git a/lib/AsmJit/CMakeLists.txt b/lib/AsmJit/CMakeLists.txt new file mode 100644 index 0000000..2056cdf --- /dev/null +++ b/lib/AsmJit/CMakeLists.txt @@ -0,0 +1,61 @@ +# Reguire minimum version of CMake +CMake_Minimum_Required(VERSION 2.6) + +# AsmJit C++ sources +Set(ASMJIT_SOURCES + Assembler.cpp + AssemblerX86X64.cpp + CodeGenerator.cpp + Compiler.cpp + CompilerX86X64.cpp + CpuInfo.cpp + Defs.cpp + DefsX86X64.cpp + Logger.cpp + MemoryManager.cpp + Operand.cpp + OperandX86X64.cpp + Platform.cpp + Util.cpp +) + +# AsmJit C++ headers +Set(ASMJIT_HEADERS + ApiBegin.h + ApiEnd.h + AsmJit.h + Assembler.h + AssemblerX86X64.h + Build.h + CodeGenerator.h + Compiler.h + CompilerX86X64.h + Config.h + CpuInfo.h + Defs.h + DefsX86X64.h + Logger.h + MemoryManager.h + Operand.h + OperandX86X64.h + Platform.h + Util.h + Util_p.h +) + +# pthread library is needed for non-windows OSes. +If(NOT WIN32) + Link_Libraries(pthread) +EndIf(NOT WIN32) + +# Build-Type. +If(${CMAKE_BUILD_TYPE}) + If(${CMAKE_BUILD_TYPE} MATCHES "Debug") + Add_Definitions(-DASMJIT_DEBUG) + Else() + Add_Definitions(-DASMJIT_NO_DEBUG) + EndIf() +EndIf() + +Add_Library(AsmJit SHARED ${ASMJIT_SOURCES} ${ASMJIT_HEADERS}) + diff --git a/lib/AsmJit/CodeGenerator.cpp b/lib/AsmJit/CodeGenerator.cpp new file mode 100644 index 0000000..180c918 --- /dev/null +++ b/lib/AsmJit/CodeGenerator.cpp @@ -0,0 +1,101 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Dependencies] +#include "Assembler.h" +#include "CodeGenerator.h" +#include "Defs.h" +#include "MemoryManager.h" + +namespace AsmJit { + +// ============================================================================ +// [AsmJit::CodeGenerator] +// ============================================================================ + +CodeGenerator::CodeGenerator() +{ +} + +CodeGenerator::~CodeGenerator() +{ +} + +CodeGenerator* CodeGenerator::getGlobal() +{ + static JitCodeGenerator global; + return &global; +} + +// ============================================================================ +// [AsmJit::JitCodeGenerator] +// ============================================================================ + +JitCodeGenerator::JitCodeGenerator() : + _memoryManager(NULL), + _allocType(MEMORY_ALLOC_FREEABLE) +{ +} + +JitCodeGenerator::~JitCodeGenerator() +{ +} + +uint32_t JitCodeGenerator::generate(void** dest, Assembler* assembler) +{ + // Disallow empty code generation. + sysuint_t codeSize = assembler->getCodeSize(); + if (codeSize == 0) + { + *dest = NULL; + return AsmJit::ERROR_NO_FUNCTION; + } + + // Switch to global memory manager if not provided. + MemoryManager* memmgr = getMemoryManager(); + if (memmgr == NULL) memmgr = MemoryManager::getGlobal(); + + void* p = memmgr->alloc(codeSize, getAllocType()); + if (p == NULL) + { + *dest = NULL; + return ERROR_NO_VIRTUAL_MEMORY; + } + + // Relocate the code. + sysuint_t relocatedSize = assembler->relocCode(p); + + // Return unused memory to mamory-manager. + if (relocatedSize < codeSize) + { + memmgr->shrink(p, relocatedSize); + } + + // Return the code. + *dest = p; + return ERROR_NONE; +} + +} // AsmJit namespace diff --git a/lib/AsmJit/CodeGenerator.h b/lib/AsmJit/CodeGenerator.h new file mode 100644 index 0000000..fc0f5a6 --- /dev/null +++ b/lib/AsmJit/CodeGenerator.h @@ -0,0 +1,146 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_CODEGENERATOR_H +#define _ASMJIT_CODEGENERATOR_H + +// [Dependencies] +#include "Build.h" + +namespace AsmJit { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +struct Assembler; +struct JitCodeGenerator; +struct MemoryManager; + +// ============================================================================ +// [AsmJit::CodeGenerator] +// ============================================================================ + +//! @brief Code generator is core class for changing behavior of code generated +//! by @c Assembler or @c Compiler. +struct ASMJIT_API CodeGenerator +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a @c CodeGenerator instance. + CodeGenerator(); + //! @brief Destroy the @c CodeGenerator instance. + virtual ~CodeGenerator(); + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + //! @brief Allocate memory for code generated in @a assembler and reloc it + //! to target location. + //! + //! This method is universal allowing any pre-process / post-process work + //! with code generated by @c Assembler or @c Compiler. Because @c Compiler + //! always uses @c Assembler it's allowed to access only the @c Assembler + //! instance. + //! + //! This method is always last step when using code generation. You can use + //! it to allocate memory for JIT code, saving code to remote process or a + //! shared library. + //! + //! @retrurn Error value, see @c ERROR_CODE. + virtual uint32_t generate(void** dest, Assembler* assembler) = 0; + + // -------------------------------------------------------------------------- + // [Statics] + // -------------------------------------------------------------------------- + + static CodeGenerator* getGlobal(); + +private: + ASMJIT_DISABLE_COPY(CodeGenerator) +}; + +// ============================================================================ +// [AsmJit::JitCodeGenerator] +// ============================================================================ + +struct JitCodeGenerator : public CodeGenerator +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a @c JitCodeGenerator instance. + JitCodeGenerator(); + //! @brief Destroy the @c JitCodeGenerator instance. + virtual ~JitCodeGenerator(); + + // -------------------------------------------------------------------------- + // [Memory Manager and Alloc Type] + // -------------------------------------------------------------------------- + + // Note: These members can be ignored by all derived classes. They are here + // only to privide default implementation. All other implementations (remote + // code patching or making dynamic loadable libraries/executables) ignore + // members accessed by these accessors. + + //! @brief Get the @c MemoryManager instance. + inline MemoryManager* getMemoryManager() const { return _memoryManager; } + //! @brief Set the @c MemoryManager instance. + inline void setMemoryManager(MemoryManager* memoryManager) { _memoryManager = memoryManager; } + + //! @brief Get the type of allocation. + inline uint32_t getAllocType() const { return _allocType; } + //! @brief Set the type of allocation. + inline void setAllocType(uint32_t allocType) { _allocType = allocType; } + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + virtual uint32_t generate(void** dest, Assembler* assembler); + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + //! @brief Memory manager. + MemoryManager* _memoryManager; + //! @brief Type of allocation. + uint32_t _allocType; + +private: + ASMJIT_DISABLE_COPY(JitCodeGenerator) +}; + +} // AsmJit namespace + +// [Guard] +#endif // _ASMJIT_CODEGENERATOR_H diff --git a/lib/AsmJit/Compiler.cpp b/lib/AsmJit/Compiler.cpp new file mode 100644 index 0000000..4f4c8bf --- /dev/null +++ b/lib/AsmJit/Compiler.cpp @@ -0,0 +1,291 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// We are using sprintf() here. +#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS) +#define _CRT_SECURE_NO_WARNINGS +#endif // _MSC_VER + +// [Dependencies] +#include "Assembler.h" +#include "Compiler.h" +#include "CpuInfo.h" +#include "Logger.h" +#include "Util.h" + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +// ============================================================================ +// [AsmJit::Emittable] +// ============================================================================ + +Emittable::Emittable(Compiler* c, uint32_t type) ASMJIT_NOTHROW : + _compiler(c), + _next(NULL), + _prev(NULL), + _comment(NULL), + _type((uint8_t)type), + _translated(false), + _reserved0(0), + _reserved1(0), + _offset(INVALID_VALUE) +{ +} + +Emittable::~Emittable() ASMJIT_NOTHROW +{ +} + +void Emittable::prepare(CompilerContext& cc) ASMJIT_NOTHROW +{ + _offset = cc._currentOffset; +} + +Emittable* Emittable::translate(CompilerContext& cc) ASMJIT_NOTHROW +{ + return translated(); +} + +void Emittable::emit(Assembler& a) ASMJIT_NOTHROW +{ +} + +void Emittable::post(Assembler& a) ASMJIT_NOTHROW +{ +} + +int Emittable::getMaxSize() const ASMJIT_NOTHROW +{ + // Default maximum size is -1 which means that it's not known. + return -1; +} + +bool Emittable::_tryUnuseVar(VarData* v) ASMJIT_NOTHROW +{ + return false; +} + +void Emittable::setComment(const char* str) ASMJIT_NOTHROW +{ + _comment = _compiler->getZone().zstrdup(str); +} + +void Emittable::setCommentF(const char* fmt, ...) ASMJIT_NOTHROW +{ + // I'm really not expecting larger inline comments:) + char buf[256]; + + va_list ap; + va_start(ap, fmt); + vsnprintf(buf, 255, fmt, ap); + va_end(ap); + + // I don't know if vsnprintf can produce non-null terminated string, in case + // it can, we terminate it here. + buf[255] = '\0'; + + setComment(buf); +} + +// ============================================================================ +// [AsmJit::EDummy] +// ============================================================================ + +EDummy::EDummy(Compiler* c) ASMJIT_NOTHROW : + Emittable(c, EMITTABLE_DUMMY) +{ +} + +EDummy::~EDummy() ASMJIT_NOTHROW +{ +} + +int EDummy::getMaxSize() const ASMJIT_NOTHROW +{ + return 0; +} + +// ============================================================================ +// [AsmJit::EFunctionEnd] +// ============================================================================ + +EFunctionEnd::EFunctionEnd(Compiler* c) ASMJIT_NOTHROW : + EDummy(c) +{ + _type = EMITTABLE_FUNCTION_END; +} + +EFunctionEnd::~EFunctionEnd() ASMJIT_NOTHROW +{ +} + +Emittable* EFunctionEnd::translate(CompilerContext& cc) ASMJIT_NOTHROW +{ + _translated = true; + return NULL; +} + +// ============================================================================ +// [AsmJit::EComment] +// ============================================================================ + +EComment::EComment(Compiler* c, const char* str) ASMJIT_NOTHROW : + Emittable(c, EMITTABLE_COMMENT) +{ + setComment(str); +} + +EComment::~EComment() ASMJIT_NOTHROW +{ +} + +void EComment::emit(Assembler& a) ASMJIT_NOTHROW +{ + if (a.getLogger()) + { + a.getLogger()->logString(getComment()); + } +} + +int EComment::getMaxSize() const ASMJIT_NOTHROW +{ + return 0; +} + +// ============================================================================ +// [AsmJit::EData] +// ============================================================================ + +EData::EData(Compiler* c, const void* data, sysuint_t length) ASMJIT_NOTHROW : + Emittable(c, EMITTABLE_EMBEDDED_DATA) +{ + _length = length; + memcpy(_data, data, length); +} + +EData::~EData() ASMJIT_NOTHROW +{ +} + +void EData::emit(Assembler& a) ASMJIT_NOTHROW +{ + a.embed(_data, _length); +} + +int EData::getMaxSize() const ASMJIT_NOTHROW +{ + return (int)_length;; +} + +// ============================================================================ +// [AsmJit::EAlign] +// ============================================================================ + +EAlign::EAlign(Compiler* c, uint32_t size) ASMJIT_NOTHROW : + Emittable(c, EMITTABLE_ALIGN), _size(size) +{ +} + +EAlign::~EAlign() ASMJIT_NOTHROW +{ +} + +void EAlign::emit(Assembler& a) ASMJIT_NOTHROW +{ + a.align(_size); +} + +int EAlign::getMaxSize() const ASMJIT_NOTHROW +{ + return (_size > 0) ? (int)_size - 1 : 0; +} + +// ============================================================================ +// [AsmJit::ETarget] +// ============================================================================ + +ETarget::ETarget(Compiler* c, const Label& label) ASMJIT_NOTHROW : + Emittable(c, EMITTABLE_TARGET), + _label(label), + _from(NULL), + _state(NULL), + _jumpsCount(0) +{ +} + +ETarget::~ETarget() ASMJIT_NOTHROW +{ +} + +void ETarget::prepare(CompilerContext& cc) ASMJIT_NOTHROW +{ + _offset = cc._currentOffset++; +} + +Emittable* ETarget::translate(CompilerContext& cc) ASMJIT_NOTHROW +{ + // If this ETarget was already translated, it's needed to change the current + // state and return NULL to tell CompilerContext to process next untranslated + // emittable. + if (_translated) + { + cc._restoreState(_state); + return NULL; + } + + if (cc._unrecheable) + { + cc._unrecheable = 0; + + // Assign state to the compiler context. + ASMJIT_ASSERT(_state != NULL); + cc._assignState(_state); + } + else + { + _state = cc._saveState(); + } + + return translated(); +} + +void ETarget::emit(Assembler& a) ASMJIT_NOTHROW +{ + a.bind(_label); +} + +int ETarget::getMaxSize() const ASMJIT_NOTHROW +{ + return 0; +} + +} // AsmJit namespace diff --git a/lib/AsmJit/Compiler.h b/lib/AsmJit/Compiler.h new file mode 100644 index 0000000..7d44a43 --- /dev/null +++ b/lib/AsmJit/Compiler.h @@ -0,0 +1,863 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_COMPILER_H +#define _ASMJIT_COMPILER_H + +// [Dependencies] +#include "Build.h" +#include "Defs.h" +#include "Operand.h" +#include "Util.h" + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +struct Assembler; +struct Compiler; +struct CompilerContext; +struct CompilerCore; +struct CompilerIntrinsics; + +struct FunctionDefinition; + +struct ForwardJumpData; + +struct VarData; +struct VarAllocRecord; +struct StateData; + +struct Emittable; +struct EAlign; +struct ECall; +struct EComment; +struct EData; +struct EEpilog; +struct EFunction; +struct EFunctionEnd; +struct EInstruction; +struct EJmp; +struct EProlog; +struct ERet; + +// ============================================================================ +// [AsmJit::TypeToId] +// ============================================================================ + +#if !defined(ASMJIT_NODOC) + +#if defined(ASMJIT_HAS_PARTIAL_TEMPLATE_SPECIALIZATION) + +template<typename T> +struct TypeToId +{ +#if defined(ASMJIT_NODOC) + enum { Id = INVALID_VALUE }; +#endif // ASMJIT_NODOC +}; + +template<typename T> +struct TypeToId<T*> { enum { Id = VARIABLE_TYPE_INTPTR }; }; + +#else + +// Same trict is used in Qt, Boost, Fog and all other libraries that need +// something similar. +// +// It's easy. It's needed to use sizeof() to determine the size +// of return value of this function. If size will be sizeof(char) +// (this is our type) then type is pointer, otherwise it's not. +template<typename T> +char TypeToId_NoPtiHelper(T*(*)()); +// And specialization. +void* TypeToId_NoPtiHelper(...); + +template<typename T> +struct TypeToId +{ + // TypeInfo constants + enum + { + // This is the hackery result. + Id = (sizeof(char) == sizeof( TypeToId_NoPtiHelper((T(*)())0) ) + ? VARIABLE_TYPE_INTPTR + : INVALID_VALUE) + }; +}; + +#endif // ASMJIT_HAS_PARTIAL_TEMPLATE_SPECIALIZATION + +#define ASMJIT_DECLARE_TYPE_AS_ID(__T__, __Id__) \ + template<> \ + struct TypeToId<__T__> { enum { Id = __Id__ }; } + +// Declare void type and alternative. +struct Void {}; +ASMJIT_DECLARE_TYPE_AS_ID(void, INVALID_VALUE); +ASMJIT_DECLARE_TYPE_AS_ID(Void, INVALID_VALUE); + +#endif // ASMJIT_NODOC + +// ============================================================================ +// [AsmJit::Function Builder] +// ============================================================================ + +struct FunctionDefinition +{ + //! @brief Get function arguments IDs. + inline const uint32_t* getArguments() const + { + return _arguments; + } + + //! @brief Get function arguments count. + inline uint32_t getArgumentsCount() const + { + return _argumentsCount; + } + + inline uint32_t getArgument(uint32_t id) const + { + ASMJIT_ASSERT(id < _argumentsCount); + return _arguments[id]; + } + + //! @brief Get function return value. + inline uint32_t getReturnValue() const + { + return _returnValue; + } + +protected: + inline void _setDefinition(const uint32_t* arguments, uint32_t argumentsCount, uint32_t returnValue) + { + _arguments = arguments; + _argumentsCount = argumentsCount; + _returnValue = returnValue; + } + + const uint32_t* _arguments; + uint32_t _argumentsCount; + uint32_t _returnValue; +}; + +//! @brief Custom function builder for up to 32 function arguments. +struct FunctionBuilderX : public FunctionDefinition +{ + inline FunctionBuilderX() + { + _setDefinition(_argumentsData, 0, INVALID_VALUE); + } + + template<typename T> + inline void addArgument() + { + addArgumentRaw(TypeToId<ASMJIT_TYPE_TO_TYPE(T)>::Id); + } + + template<typename T> + inline void setArgument(uint32_t id) + { + setArgumentRaw(id, TypeToId<ASMJIT_TYPE_TO_TYPE(T)>::Id); + } + + template<typename T> + inline void setReturnValue() + { + setReturnValueRaw(TypeToId<ASMJIT_TYPE_TO_TYPE(T)>::Id); + } + + inline void addArgumentRaw(uint32_t type) + { + ASMJIT_ASSERT(_argumentsCount < FUNC_MAX_ARGS); + _argumentsData[_argumentsCount++] = type; + } + + inline void setArgumentRaw(uint32_t id, uint32_t type) + { + ASMJIT_ASSERT(id < _argumentsCount); + _argumentsData[id] = type; + } + + inline void setReturnValueRaw(uint32_t returnValue) + { + _returnValue = returnValue; + } + +protected: + uint32_t _argumentsData[FUNC_MAX_ARGS]; +}; + +//! @brief Class used to build function without arguments. +template<typename RET> +struct FunctionBuilder0 : public FunctionDefinition +{ + inline FunctionBuilder0() + { + _setDefinition(NULL, 0, TypeToId<RET>::Id); + } +}; + +//! @brief Class used to build function with 1 argument. +template<typename RET, typename P0> +struct FunctionBuilder1 : public FunctionDefinition +{ + inline FunctionBuilder1() + { + static const uint32_t args[] = + { + TypeToId<P0>::Id + }; + _setDefinition(args, ASMJIT_ARRAY_SIZE(args), TypeToId<RET>::Id); + } +}; + +//! @brief Class used to build function with 2 arguments. +template<typename RET, typename P0, typename P1> +struct FunctionBuilder2 : public FunctionDefinition +{ + inline FunctionBuilder2() + { + static const uint32_t args[] = + { + TypeToId<P0>::Id, + TypeToId<P1>::Id + }; + _setDefinition(args, ASMJIT_ARRAY_SIZE(args), TypeToId<RET>::Id); + } +}; + +//! @brief Class used to build function with 3 arguments. +template<typename RET, typename P0, typename P1, typename P2> +struct FunctionBuilder3 : public FunctionDefinition +{ + inline FunctionBuilder3() + { + static const uint32_t args[] = + { + TypeToId<P0>::Id, + TypeToId<P1>::Id, + TypeToId<P2>::Id + }; + _setDefinition(args, ASMJIT_ARRAY_SIZE(args), TypeToId<RET>::Id); + } +}; + +//! @brief Class used to build function with 4 arguments. +template<typename RET, typename P0, typename P1, typename P2, typename P3> +struct FunctionBuilder4 : public FunctionDefinition +{ + inline FunctionBuilder4() + { + static const uint32_t args[] = + { + TypeToId<P0>::Id, + TypeToId<P1>::Id, + TypeToId<P2>::Id, + TypeToId<P3>::Id + }; + _setDefinition(args, ASMJIT_ARRAY_SIZE(args), TypeToId<RET>::Id); + } +}; + +//! @brief Class used to build function with 5 arguments. +template<typename RET, typename P0, typename P1, typename P2, typename P3, typename P4> +struct FunctionBuilder5 : public FunctionDefinition +{ + inline FunctionBuilder5() + { + static const uint32_t args[] = + { + TypeToId<P0>::Id, + TypeToId<P1>::Id, + TypeToId<P2>::Id, + TypeToId<P3>::Id, + TypeToId<P4>::Id + }; + _setDefinition(args, ASMJIT_ARRAY_SIZE(args), TypeToId<RET>::Id); + } +}; + +//! @brief Class used to build function with 6 arguments. +template<typename RET, typename P0, typename P1, typename P2, typename P3, typename P4, typename P5> +struct FunctionBuilder6 : public FunctionDefinition +{ + inline FunctionBuilder6() + { + static const uint32_t args[] = + { + TypeToId<P0>::Id, + TypeToId<P1>::Id, + TypeToId<P2>::Id, + TypeToId<P3>::Id, + TypeToId<P4>::Id, + TypeToId<P5>::Id + }; + _setDefinition(args, ASMJIT_ARRAY_SIZE(args), TypeToId<RET>::Id); + } +}; + +//! @brief Class used to build function with 7 arguments. +template<typename RET, typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6> +struct FunctionBuilder7 : public FunctionDefinition +{ + inline FunctionBuilder7() + { + static const uint32_t args[] = + { + TypeToId<P0>::Id, + TypeToId<P1>::Id, + TypeToId<P2>::Id, + TypeToId<P3>::Id, + TypeToId<P4>::Id, + TypeToId<P5>::Id, + TypeToId<P6>::Id + }; + _setDefinition(args, ASMJIT_ARRAY_SIZE(args), TypeToId<RET>::Id); + } +}; + +//! @brief Class used to build function with 8 arguments. +template<typename RET, typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7> +struct FunctionBuilder8 : public FunctionDefinition +{ + inline FunctionBuilder8() + { + static const uint32_t args[] = + { + TypeToId<P0>::Id, + TypeToId<P1>::Id, + TypeToId<P2>::Id, + TypeToId<P3>::Id, + TypeToId<P4>::Id, + TypeToId<P5>::Id, + TypeToId<P6>::Id, + TypeToId<P7>::Id + }; + _setDefinition(args, ASMJIT_ARRAY_SIZE(args), TypeToId<RET>::Id); + } +}; + +//! @brief Class used to build function with 9 arguments. +template<typename RET, typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8> +struct FunctionBuilder9 : public FunctionDefinition +{ + inline FunctionBuilder9() + { + static const uint32_t args[] = + { + TypeToId<P0>::Id, + TypeToId<P1>::Id, + TypeToId<P2>::Id, + TypeToId<P3>::Id, + TypeToId<P4>::Id, + TypeToId<P5>::Id, + TypeToId<P6>::Id, + TypeToId<P7>::Id, + TypeToId<P8>::Id + }; + _setDefinition(args, ASMJIT_ARRAY_SIZE(args), TypeToId<RET>::Id); + } +}; + +//! @brief Class used to build function with 10 arguments. +template<typename RET, typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9> +struct FunctionBuilder10 : public FunctionDefinition +{ + inline FunctionBuilder10() + { + static const uint32_t args[] = + { + TypeToId<P0>::Id, + TypeToId<P1>::Id, + TypeToId<P2>::Id, + TypeToId<P3>::Id, + TypeToId<P4>::Id, + TypeToId<P5>::Id, + TypeToId<P6>::Id, + TypeToId<P7>::Id, + TypeToId<P8>::Id, + TypeToId<P9>::Id + }; + _setDefinition(args, ASMJIT_ARRAY_SIZE(args), TypeToId<RET>::Id); + } +}; + +// ============================================================================ +// [AsmJit::Emittable] +// ============================================================================ + +//! @brief Emmitable. +//! +//! Emittable is object that can emit single or more instructions. To +//! create your custom emittable it's needed to override the abstract virtual +//! method @c emit(). +//! +//! When you are finished serializing instructions to the @c Compiler and you +//! call @c Compiler::make(), it will first call @c prepare() method for each +//! emittable in list, then @c translate(), @c emit() and @c post() is the last. +//! Prepare can be used to calculate something that can be only calculated when +//! emitting instructions is finished (for example @c Function uses @c prepare() +//! to relocate memory home for all memory/spilled variables). The @c emit() should +//! be used to emit instruction or multiple instructions into @a Assembler stream, +//! and the @c post() is here to allow emitting embedded data (after function +//! declaration), etc. +struct ASMJIT_API Emittable +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new emittable. + //! + //! Never create @c Emittable by @c new operator or on the stack, use + //! @c Compiler::newObject template to do that. + Emittable(Compiler* c, uint32_t type) ASMJIT_NOTHROW; + + //! @brief Destroy emittable. + //! + //! @note Never destroy emittable using @c delete keyword, @c Compiler + //! manages all emittables in internal memory pool and it will destroy + //! all emittables after you destroy it. + virtual ~Emittable() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit and Helpers] + // -------------------------------------------------------------------------- + + //! @brief Step 1. Extract emittable variables, update statistics, ... + virtual void prepare(CompilerContext& cc) ASMJIT_NOTHROW; + //! @brief Step 2. Translate instruction, alloc variables, ... + virtual Emittable* translate(CompilerContext& cc) ASMJIT_NOTHROW; + //! @brief Step 3. Emit to @c Assembler. + virtual void emit(Assembler& a) ASMJIT_NOTHROW; + //! @brief Step 4. Last post step (verify, add data, etc). + virtual void post(Assembler& a) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Utilities] + // -------------------------------------------------------------------------- + + //! @brief Get maximum size in bytes of this emittable (in binary). + virtual int getMaxSize() const ASMJIT_NOTHROW; + + //! @brief Try to unuse the variable @a. + //! + //! Returns @c true only if the variable will be unused by the instruction, + //! otherwise @c false is returned. + virtual bool _tryUnuseVar(VarData* v) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Compiler] + // -------------------------------------------------------------------------- + + //! @brief Get associated compiler instance. + inline Compiler* getCompiler() const ASMJIT_NOTHROW { return _compiler; } + + // -------------------------------------------------------------------------- + // [Type / Offset] + // -------------------------------------------------------------------------- + + //! @brief Get emittable type, see @c EMITTABLE_TYPE. + inline uint32_t getType() const ASMJIT_NOTHROW { return _type; } + + //! @brief Get whether the emittable was translated. + inline uint8_t isTranslated() const ASMJIT_NOTHROW { return _translated; } + + //! @brief Get emittable offset in the stream + //! + //! Emittable offset is not byte offset, each emittable increments offset by 1 + //! and this value is then used by register allocator. Emittable offset is + //! set by compiler by the register allocator, don't use it in your code. + inline uint32_t getOffset() const ASMJIT_NOTHROW { return _offset; } + + // -------------------------------------------------------------------------- + // [Emittables List] + // -------------------------------------------------------------------------- + + //! @brief Get previous emittable in list. + inline Emittable* getPrev() const ASMJIT_NOTHROW { return _prev; } + //! @brief Get next emittable in list. + inline Emittable* getNext() const ASMJIT_NOTHROW { return _next; } + + // -------------------------------------------------------------------------- + // [Comment] + // -------------------------------------------------------------------------- + + //! @brief Get comment string. + inline const char* getComment() const ASMJIT_NOTHROW { return _comment; } + + //! @brief Set comment string to @a str. + void setComment(const char* str) ASMJIT_NOTHROW; + + //! @brief Format comment string using @a fmt string and variable argument list. + void setCommentF(const char* fmt, ...) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Protected] + // -------------------------------------------------------------------------- + +protected: + //! @brief Mark emittable as translated and return next. + inline Emittable* translated() ASMJIT_NOTHROW + { + ASMJIT_ASSERT(_translated == false); + + _translated = true; + return _next; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Compiler where this emittable is connected to. + Compiler* _compiler; + + //! @brief Type of emittable, see @c EMITTABLE_TYPE. + uint8_t _type; + //! @brief Whether the emittable was translated, see @c translate(). + uint8_t _translated; + //! @brief Reserved flags for future use. + uint8_t _reserved0; + //! @brief Reserved flags for future use. + uint8_t _reserved1; + + //! @brief Emittable offset. + uint32_t _offset; + + //! @brief Previous emittable. + Emittable* _prev; + //! @brief Next emittable. + Emittable* _next; + + //! @brief Embedded comment string (also used by a @c Comment emittable). + const char* _comment; + +private: + friend struct CompilerCore; + + ASMJIT_DISABLE_COPY(Emittable) +}; + +// ============================================================================ +// [AsmJit::EDummy] +// ============================================================================ + +//! @brief Dummy emittable, used as a mark. +//! +//! This emittable does nothing and it's only used by @ref Compiler to mark +//! specific location in the code. +struct ASMJIT_API EDummy : public Emittable +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref EDummy instance. + EDummy(Compiler* c) ASMJIT_NOTHROW; + //! @brief Destroy the @ref EDummy instance. + virtual ~EDummy() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Utilities] + // -------------------------------------------------------------------------- + + virtual int getMaxSize() const ASMJIT_NOTHROW; + +private: + ASMJIT_DISABLE_COPY(EDummy) +}; + +// ============================================================================ +// [AsmJit::EFunctionEnd] +// ============================================================================ + +//! @brief End of function. +//! +//! This emittable does nothing and it's only used by @ref Compiler to mark +//! specific location in the code. The @c EFunctionEnd is similar to @c EDummy, +//! except that it overrides @c translate() to return @c NULL. +struct ASMJIT_API EFunctionEnd : public EDummy +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref EDummy instance. + EFunctionEnd(Compiler* c) ASMJIT_NOTHROW; + //! @brief Destroy the @ref EDummy instance. + virtual ~EFunctionEnd() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit and Helpers] + // -------------------------------------------------------------------------- + + virtual Emittable* translate(CompilerContext& cc) ASMJIT_NOTHROW; + +private: + ASMJIT_DISABLE_COPY(EFunctionEnd) +}; + +// ============================================================================ +// [AsmJit::EComment] +// ============================================================================ + +//! @brief Emittable used to emit comment into @c Assembler logger. +//! +//! Comments allows to comment your assembler stream for better debugging +//! and visualization what's happening. Comments are ignored if logger is +//! not set. +//! +//! Comment string can't be modified after comment was created. +struct ASMJIT_API EComment : public Emittable +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref EComment instance. + EComment(Compiler* c, const char* comment) ASMJIT_NOTHROW; + //! @brief Destroy the @ref EComment instance. + virtual ~EComment() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + virtual void emit(Assembler& a) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Utilities] + // -------------------------------------------------------------------------- + + virtual int getMaxSize() const ASMJIT_NOTHROW; + +private: + friend struct CompilerCore; + + ASMJIT_DISABLE_COPY(EComment) +}; + +// ============================================================================ +// [AsmJit::EData] +// ============================================================================ + +//! @brief Emittable used to emit comment into @c Assembler logger. +//! +//! @note This class is always allocated by @c AsmJit::Compiler. +struct ASMJIT_API EData : public Emittable +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref EData instance. + EData(Compiler* c, const void* data, sysuint_t length) ASMJIT_NOTHROW; + //! @brief Destroy the @ref EData instance. + virtual ~EData() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + virtual void emit(Assembler& a) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Utilities] + // -------------------------------------------------------------------------- + + virtual int getMaxSize() const ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Data] + // -------------------------------------------------------------------------- + + //! @brief Get pointer to embedded data. + uint8_t* getData() const ASMJIT_NOTHROW { return (uint8_t*)_data; } + + //! @brief Get length of embedded data. + sysuint_t getLength() const ASMJIT_NOTHROW { return _length; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + //! @brief Data length. + sysuint_t _length; + //! @brief Data buffer (that will be embedded to the assembler stream). + uint8_t _data[sizeof(void*)]; + +private: + friend struct CompilerCore; + + ASMJIT_DISABLE_COPY(EData) +}; + +// ============================================================================ +// [AsmJit::EAlign] +// ============================================================================ + +//! @brief Emittable used to align assembler code. +struct ASMJIT_API EAlign : public Emittable +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref EAlign instance. + EAlign(Compiler* c, uint32_t size = 0) ASMJIT_NOTHROW; + //! @brief Destroy the @ref EAlign instance. + virtual ~EAlign() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + virtual void emit(Assembler& a) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Utilities] + // -------------------------------------------------------------------------- + + virtual int getMaxSize() const ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Align Size] + // -------------------------------------------------------------------------- + + //! @brief Get align size in bytes. + inline uint32_t getSize() const ASMJIT_NOTHROW { return _size; } + //! @brief Set align size in bytes to @a size. + inline void setSize(uint32_t size) ASMJIT_NOTHROW { _size = size; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + //! @brief Align size. + uint32_t _size; + +private: + friend struct CompilerCore; + + ASMJIT_DISABLE_COPY(EAlign) +}; + +// ============================================================================ +// [AsmJit::ETarget] +// ============================================================================ + +//! @brief Target - the bound label. +struct ASMJIT_API ETarget : public Emittable +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref ETarget instance. + ETarget(Compiler* c, const Label& target) ASMJIT_NOTHROW; + //! @brief Destroy the @ref ETarget instance. + virtual ~ETarget() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + virtual void prepare(CompilerContext& cc) ASMJIT_NOTHROW; + virtual Emittable* translate(CompilerContext& cc) ASMJIT_NOTHROW; + virtual void emit(Assembler& a) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Utilities] + // -------------------------------------------------------------------------- + + virtual int getMaxSize() const ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Methods] + // -------------------------------------------------------------------------- + + //! @brief Return label bound to this target. + inline const Label& getLabel() const ASMJIT_NOTHROW { return _label; } + + //! @brief Get first jmp instruction. + inline EJmp* getFrom() const ASMJIT_NOTHROW { return _from; } + + //! @brief Get register allocator state for this target. + inline StateData* getState() const ASMJIT_NOTHROW { return _state; } + + //! @brief Get number of jumps to this target. + inline uint32_t getJumpsCount() const ASMJIT_NOTHROW { return _jumpsCount; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + //! @brief Label. + Label _label; + //! @brief First jump instruction that points to this target (label). + EJmp* _from; + //! @brief State at this location. + StateData* _state; + + //! @brief Count of jumps to this target (label). + uint32_t _jumpsCount; + +private: + friend struct CompilerContext; + friend struct CompilerCore; + friend struct EInstruction; + friend struct EJmp; + + ASMJIT_DISABLE_COPY(ETarget) +}; + +} // AsmJit namespace + +// [Api-End] +#include "ApiEnd.h" + +// ============================================================================ +// [Platform Specific] +// ============================================================================ + +// [X86 / X64] +#if defined(ASMJIT_X86) || defined(ASMJIT_X64) +#include "CompilerX86X64.h" +#endif // ASMJIT_X86 || ASMJIT_X64 + +// [Guard] +#endif // _ASMJIT_COMPILER_H diff --git a/lib/AsmJit/CompilerX86X64.cpp b/lib/AsmJit/CompilerX86X64.cpp new file mode 100644 index 0000000..b3f9e28 --- /dev/null +++ b/lib/AsmJit/CompilerX86X64.cpp @@ -0,0 +1,7812 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// We are using sprintf() here. +#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS) +#define _CRT_SECURE_NO_WARNINGS +#endif // _MSC_VER + +// [Dependencies] +#include "Assembler.h" +#include "CodeGenerator.h" +#include "Compiler.h" +#include "CpuInfo.h" +#include "Logger.h" +#include "Util_p.h" + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +// ============================================================================ +// [Helpers - Logging] +// ============================================================================ + +// Defined in AssemblerX86X64.cpp. +ASMJIT_HIDDEN char* dumpRegister(char* buf, uint32_t type, uint32_t index) ASMJIT_NOTHROW; +ASMJIT_HIDDEN char* dumpOperand(char* buf, const Operand* op) ASMJIT_NOTHROW; + +// ============================================================================ +// [Helpers - Variables] +// ============================================================================ + +struct VariableInfo +{ + enum CLASS_INFO + { + CLASS_NONE = 0x00, + CLASS_GP = 0x01, + CLASS_X87 = 0x02, + CLASS_MM = 0x04, + CLASS_XMM = 0x08, + }; + + enum FLAGS + { + FLAG_SP_FP = 0x10, + FLAG_DP_FP = 0x20, + FLAG_VECTOR = 0x40 + }; + + uint32_t code; + uint8_t size; + uint8_t clazz; + uint8_t flags; + uint8_t reserved_0; + char name[8]; +}; + +#define C(c) VariableInfo::CLASS_##c +#define F(f) VariableInfo::FLAG_##f +static const VariableInfo variableInfo[] = +{ + /* 0 */ { REG_TYPE_GPD , 4 , C(GP) , 0 , 0, "GP.D" }, + /* 1 */ { REG_TYPE_GPQ , 8 , C(GP) , 0 , 0, "GP.Q" }, + /* 2 */ { REG_TYPE_X87 , 4 , C(X87), F(SP_FP) , 0, "X87" }, + /* 3 */ { REG_TYPE_X87 , 4 , C(X87), F(SP_FP) , 0, "X87.1F" }, + /* 4 */ { REG_TYPE_X87 , 8 , C(X87), F(DP_FP) , 0, "X87.1D" }, + /* 5 */ { REG_TYPE_MM , 8 , C(MM) , F(VECTOR), 0, "MM" }, + /* 6 */ { REG_TYPE_XMM , 16, C(XMM), 0 , 0, "XMM" }, + /* 7 */ { REG_TYPE_XMM , 4 , C(XMM), F(SP_FP) , 0, "XMM.1F" }, + /* 8 */ { REG_TYPE_XMM , 8 , C(XMM), F(DP_FP) , 0, "XMM.1D" }, + /* 9 */ { REG_TYPE_XMM , 16, C(XMM), F(SP_FP) | F(VECTOR), 0, "XMM.4F" }, + /* 10 */ { REG_TYPE_XMM , 16, C(XMM), F(DP_FP) | F(VECTOR), 0, "XMM.2D" } +}; +#undef F +#undef C + +static uint32_t getVariableClass(uint32_t type) +{ + ASMJIT_ASSERT(type < ASMJIT_ARRAY_SIZE(variableInfo)); + return variableInfo[type].clazz; +} + +static uint32_t getVariableSize(uint32_t type) +{ + ASMJIT_ASSERT(type < ASMJIT_ARRAY_SIZE(variableInfo)); + return variableInfo[type].size; +} + +static uint32_t getVariableRegisterCode(uint32_t type, uint32_t index) +{ + ASMJIT_ASSERT(type < ASMJIT_ARRAY_SIZE(variableInfo)); + return variableInfo[type].code | index; +} + +static bool isVariableInteger(uint32_t type) +{ + ASMJIT_ASSERT(type < ASMJIT_ARRAY_SIZE(variableInfo)); + return (variableInfo[type].clazz & VariableInfo::CLASS_GP) != 0; +} + +static bool isVariableFloat(uint32_t type) +{ + ASMJIT_ASSERT(type < ASMJIT_ARRAY_SIZE(variableInfo)); + return (variableInfo[type].flags & (VariableInfo::FLAG_SP_FP | VariableInfo::FLAG_DP_FP)) != 0; +} + +static GPVar GPVarFromData(VarData* vdata) +{ + GPVar var; + var._var.id = vdata->id; + var._var.size = vdata->size; + var._var.registerCode = variableInfo[vdata->type].code; + var._var.variableType = vdata->type; + return var; +} + +static MMVar MMVarFromData(VarData* vdata) +{ + MMVar var; + var._var.id = vdata->id; + var._var.size = vdata->size; + var._var.registerCode = variableInfo[vdata->type].code; + var._var.variableType = vdata->type; + return var; +} + +static XMMVar XMMVarFromData(VarData* vdata) +{ + XMMVar var; + var._var.id = vdata->id; + var._var.size = vdata->size; + var._var.registerCode = variableInfo[vdata->type].code; + var._var.variableType = vdata->type; + return var; +} + +// ============================================================================ +// [Helpers - Emittables] +// ============================================================================ + +static void delAll(Emittable* first) ASMJIT_NOTHROW +{ + Emittable* cur = first; + + while (cur) + { + Emittable* next = cur->getNext(); + cur->~Emittable(); + cur = next; + } +} + +// ============================================================================ +// [Helpers - Compiler] +// ============================================================================ + +template<typename T> +inline T* Compiler_newObject(CompilerCore* self) ASMJIT_NOTHROW +{ + void* addr = self->getZone().zalloc(sizeof(T)); + return new(addr) T(reinterpret_cast<Compiler*>(self)); +} + +template<typename T, typename P1> +inline T* Compiler_newObject(CompilerCore* self, P1 p1) ASMJIT_NOTHROW +{ + void* addr = self->getZone().zalloc(sizeof(T)); + return new(addr) T(reinterpret_cast<Compiler*>(self), p1); +} + +template<typename T, typename P1, typename P2> +inline T* Compiler_newObject(CompilerCore* self, P1 p1, P2 p2) ASMJIT_NOTHROW +{ + void* addr = self->getZone().zalloc(sizeof(T)); + return new(addr) T(reinterpret_cast<Compiler*>(self), p1, p2); +} + +template<typename T, typename P1, typename P2, typename P3> +inline T* Compiler_newObject(CompilerCore* self, P1 p1, P2 p2, P3 p3) ASMJIT_NOTHROW +{ + void* addr = self->getZone().zalloc(sizeof(T)); + return new(addr) T(reinterpret_cast<Compiler*>(self), p1, p2, p3); +} + +template<typename T, typename P1, typename P2, typename P3, typename P4> +inline T* Compiler_newObject(CompilerCore* self, P1 p1, P2 p2, P3 p3, P4 p4) ASMJIT_NOTHROW +{ + void* addr = self->getZone().zalloc(sizeof(T)); + return new(addr) T(reinterpret_cast<Compiler*>(self), p1, p2, p3, p4); +} + +template<typename T, typename P1, typename P2, typename P3, typename P4, typename P5> +inline T* Compiler_newObject(CompilerCore* self, P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) ASMJIT_NOTHROW +{ + void* addr = self->getZone().zalloc(sizeof(T)); + return new(addr) T(reinterpret_cast<Compiler*>(self), p1, p2, p3, p4, p5); +} + +// ============================================================================ +// [AsmJit::FunctionPrototype] +// ============================================================================ + +FunctionPrototype::FunctionPrototype() ASMJIT_NOTHROW +{ + // Safe defaults. + _clear(); +} + +FunctionPrototype::~FunctionPrototype() ASMJIT_NOTHROW +{ +} + +void FunctionPrototype::setPrototype( + uint32_t callingConvention, + const uint32_t* arguments, + uint32_t argumentsCount, + uint32_t returnValue) ASMJIT_NOTHROW +{ + _setCallingConvention(callingConvention); + + if (argumentsCount > 32) argumentsCount = 32; + _setPrototype(arguments, argumentsCount, returnValue); +} + +uint32_t FunctionPrototype::findArgumentByRegisterCode(uint32_t regCode) const ASMJIT_NOTHROW +{ + uint32_t type = regCode & REG_TYPE_MASK; + uint32_t idx = regCode & REG_INDEX_MASK; + + uint32_t clazz; + uint32_t i; + + switch (type) + { + case REG_TYPE_GPD: + case REG_TYPE_GPQ: + clazz = VariableInfo::CLASS_GP; + break; + + case REG_TYPE_MM: + clazz = VariableInfo::CLASS_MM; + break; + + case REG_TYPE_XMM: + clazz = VariableInfo::CLASS_XMM; + break; + + default: + return INVALID_VALUE; + } + + for (i = 0; i < _argumentsCount; i++) + { + const Argument& arg = _arguments[i]; + if ((getVariableClass(arg.variableType) & clazz) != 0 && (arg.registerIndex == idx)) + return i; + } + + return INVALID_VALUE; +} + +void FunctionPrototype::_clear() ASMJIT_NOTHROW +{ + _callingConvention = CALL_CONV_NONE; + _calleePopsStack = false; + + _argumentsCount = 0; + _argumentsDirection = ARGUMENT_DIR_RIGHT_TO_LEFT; + _argumentsStackSize = 0; + + _returnValue = INVALID_VALUE; + + Util::memset32(_argumentsGPList , INVALID_VALUE, ASMJIT_ARRAY_SIZE(_argumentsGPList )); + Util::memset32(_argumentsXMMList, INVALID_VALUE, ASMJIT_ARRAY_SIZE(_argumentsXMMList)); + + _argumentsGP = 0; + _argumentsMM = 0; + _argumentsXMM = 0; + + _preservedGP = 0; + _preservedMM = 0; + _preservedXMM = 0; + + _passedGP = 0; + _passedMM = 0; + _passedXMM = 0; +} + +void FunctionPrototype::_setCallingConvention(uint32_t callingConvention) ASMJIT_NOTHROW +{ + // Safe defaults. + _clear(); + + _callingConvention = callingConvention; + + // -------------------------------------------------------------------------- + // [X86 Calling Conventions] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_X86) + _preservedGP = (1 << REG_INDEX_EBX) | + (1 << REG_INDEX_ESP) | + (1 << REG_INDEX_EBP) | + (1 << REG_INDEX_ESI) | + (1 << REG_INDEX_EDI) ; + _preservedXMM = 0; + + switch (_callingConvention) + { + case CALL_CONV_CDECL: + break; + + case CALL_CONV_STDCALL: + _calleePopsStack = true; + break; + + case CALL_CONV_MSTHISCALL: + _calleePopsStack = true; + _argumentsGPList[0] = REG_INDEX_ECX; + + _argumentsGP = (1 << REG_INDEX_ECX); + break; + + case CALL_CONV_MSFASTCALL: + _calleePopsStack = true; + _argumentsGPList[0] = REG_INDEX_ECX; + _argumentsGPList[1] = REG_INDEX_EDX; + + _argumentsGP = (1 << REG_INDEX_ECX) | + (1 << REG_INDEX_EDX) ; + break; + + case CALL_CONV_BORLANDFASTCALL: + _calleePopsStack = true; + _argumentsDirection = ARGUMENT_DIR_LEFT_TO_RIGHT; + _argumentsGPList[0] = REG_INDEX_EAX; + _argumentsGPList[1] = REG_INDEX_EDX; + _argumentsGPList[2] = REG_INDEX_ECX; + + _argumentsGP = (1 << REG_INDEX_EAX) | + (1 << REG_INDEX_EDX) | + (1 << REG_INDEX_ECX) ; + break; + + case CALL_CONV_GCCFASTCALL: + _calleePopsStack = true; + _argumentsGPList[0] = REG_INDEX_ECX; + _argumentsGPList[1] = REG_INDEX_EDX; + + _argumentsGP = (1 << REG_INDEX_ECX) | + (1 << REG_INDEX_EDX) ; + break; + + case CALL_CONV_GCCREGPARM_1: + _calleePopsStack = false; + _argumentsGPList[0] = REG_INDEX_EAX; + + _argumentsGP = (1 << REG_INDEX_EAX) ; + break; + + case CALL_CONV_GCCREGPARM_2: + _calleePopsStack = false; + _argumentsGPList[0] = REG_INDEX_EAX; + _argumentsGPList[1] = REG_INDEX_EDX; + + _argumentsGP = (1 << REG_INDEX_EAX) | + (1 << REG_INDEX_EDX) ; + break; + + case CALL_CONV_GCCREGPARM_3: + _calleePopsStack = false; + _argumentsGPList[0] = REG_INDEX_EAX; + _argumentsGPList[1] = REG_INDEX_EDX; + _argumentsGPList[2] = REG_INDEX_ECX; + + _argumentsGP = (1 << REG_INDEX_EAX) | + (1 << REG_INDEX_EDX) | + (1 << REG_INDEX_ECX) ; + break; + + default: + // Illegal calling convention. + ASMJIT_ASSERT(0); + } +#endif // ASMJIT_X86 + + // -------------------------------------------------------------------------- + // [X64 Calling Conventions] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_X64) + switch (_callingConvention) + { + case CALL_CONV_X64W: + _argumentsGPList[0] = REG_INDEX_RCX; + _argumentsGPList[1] = REG_INDEX_RDX; + _argumentsGPList[2] = REG_INDEX_R8; + _argumentsGPList[3] = REG_INDEX_R9; + + _argumentsXMMList[0] = REG_INDEX_XMM0; + _argumentsXMMList[1] = REG_INDEX_XMM1; + _argumentsXMMList[2] = REG_INDEX_XMM2; + _argumentsXMMList[3] = REG_INDEX_XMM3; + + _argumentsGP = (1 << REG_INDEX_RCX ) | + (1 << REG_INDEX_RDX ) | + (1 << REG_INDEX_R8 ) | + (1 << REG_INDEX_R9 ) ; + + _argumentsXMM = (1 << REG_INDEX_XMM0 ) | + (1 << REG_INDEX_XMM1 ) | + (1 << REG_INDEX_XMM2 ) | + (1 << REG_INDEX_XMM3 ) ; + + _preservedGP = (1 << REG_INDEX_RBX ) | + (1 << REG_INDEX_RSP ) | + (1 << REG_INDEX_RBP ) | + (1 << REG_INDEX_RSI ) | + (1 << REG_INDEX_RDI ) | + (1 << REG_INDEX_R12 ) | + (1 << REG_INDEX_R13 ) | + (1 << REG_INDEX_R14 ) | + (1 << REG_INDEX_R15 ) ; + + _preservedXMM = (1 << REG_INDEX_XMM6 ) | + (1 << REG_INDEX_XMM7 ) | + (1 << REG_INDEX_XMM8 ) | + (1 << REG_INDEX_XMM9 ) | + (1 << REG_INDEX_XMM10) | + (1 << REG_INDEX_XMM11) | + (1 << REG_INDEX_XMM12) | + (1 << REG_INDEX_XMM13) | + (1 << REG_INDEX_XMM14) | + (1 << REG_INDEX_XMM15) ; + break; + + case CALL_CONV_X64U: + _argumentsGPList[0] = REG_INDEX_RDI; + _argumentsGPList[1] = REG_INDEX_RSI; + _argumentsGPList[2] = REG_INDEX_RDX; + _argumentsGPList[3] = REG_INDEX_RCX; + _argumentsGPList[4] = REG_INDEX_R8; + _argumentsGPList[5] = REG_INDEX_R9; + + _argumentsXMMList[0] = REG_INDEX_XMM0; + _argumentsXMMList[1] = REG_INDEX_XMM1; + _argumentsXMMList[2] = REG_INDEX_XMM2; + _argumentsXMMList[3] = REG_INDEX_XMM3; + _argumentsXMMList[4] = REG_INDEX_XMM4; + _argumentsXMMList[5] = REG_INDEX_XMM5; + _argumentsXMMList[6] = REG_INDEX_XMM6; + _argumentsXMMList[7] = REG_INDEX_XMM7; + + _argumentsGP = (1 << REG_INDEX_RDI ) | + (1 << REG_INDEX_RSI ) | + (1 << REG_INDEX_RDX ) | + (1 << REG_INDEX_RCX ) | + (1 << REG_INDEX_R8 ) | + (1 << REG_INDEX_R9 ) ; + + _argumentsXMM = (1 << REG_INDEX_XMM0 ) | + (1 << REG_INDEX_XMM1 ) | + (1 << REG_INDEX_XMM2 ) | + (1 << REG_INDEX_XMM3 ) | + (1 << REG_INDEX_XMM4 ) | + (1 << REG_INDEX_XMM5 ) | + (1 << REG_INDEX_XMM6 ) | + (1 << REG_INDEX_XMM7 ) ; + + _preservedGP = (1 << REG_INDEX_RBX ) | + (1 << REG_INDEX_RSP ) | + (1 << REG_INDEX_RBP ) | + (1 << REG_INDEX_R12 ) | + (1 << REG_INDEX_R13 ) | + (1 << REG_INDEX_R14 ) | + (1 << REG_INDEX_R15 ) ; + break; + + default: + // Illegal calling convention. + ASMJIT_ASSERT(0); + } +#endif // ASMJIT_X64 +} + +void FunctionPrototype::_setPrototype( + const uint32_t* argumentsData, + uint32_t argumentsCount, + uint32_t returnValue) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(argumentsCount <= 32); + + int32_t i; + + int32_t posGP = 0; + int32_t posXMM = 0; + int32_t stackOffset = 0; + + _returnValue = returnValue; + + for (i = 0; i < (sysint_t)argumentsCount; i++) + { + Argument& a = _arguments[i]; + a.variableType = argumentsData[i]; + a.registerIndex = INVALID_VALUE; + a.stackOffset = INVALID_VALUE; + } + + _argumentsCount = (uint32_t)argumentsCount; + if (_argumentsCount == 0) return; + + // -------------------------------------------------------------------------- + // [X86 Calling Conventions (32-bit)] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_X86) + // Register arguments (Integer), always left-to-right. + for (i = 0; i != argumentsCount; i++) + { + Argument& a = _arguments[i]; + if (isVariableInteger(a.variableType) && posGP < 16 && _argumentsGPList[posGP] != INVALID_VALUE) + { + a.registerIndex = _argumentsGPList[posGP++]; + _passedGP |= Util::maskFromIndex(a.registerIndex); + } + } + + // Stack arguments. + bool ltr = _argumentsDirection == ARGUMENT_DIR_LEFT_TO_RIGHT; + sysint_t istart = ltr ? 0 : (sysint_t)argumentsCount - 1; + sysint_t iend = ltr ? (sysint_t)argumentsCount : -1; + sysint_t istep = ltr ? 1 : -1; + + for (i = istart; i != iend; i += istep) + { + Argument& a = _arguments[i]; + if (a.registerIndex != INVALID_VALUE) continue; + + if (isVariableInteger(a.variableType)) + { + stackOffset -= 4; + a.stackOffset = stackOffset; + } + else if (isVariableFloat(a.variableType)) + { + int32_t size = (int32_t)variableInfo[a.variableType].size; + stackOffset -= size; + a.stackOffset = stackOffset; + } + } +#endif // ASMJIT_X86 + + // -------------------------------------------------------------------------- + // [X64 Calling Conventions (64-bit)] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_X64) + // Windows 64-bit specific. + if (_callingConvention == CALL_CONV_X64W) + { + sysint_t max = argumentsCount < 4 ? argumentsCount : 4; + + // Register arguments (Integer / FP), always left to right. + for (i = 0; i != max; i++) + { + Argument& a = _arguments[i]; + + if (isVariableInteger(a.variableType)) + { + a.registerIndex = _argumentsGPList[i]; + _passedGP |= Util::maskFromIndex(a.registerIndex); + } + else if (isVariableFloat(a.variableType)) + { + a.registerIndex = _argumentsXMMList[i]; + _passedXMM |= Util::maskFromIndex(a.registerIndex); + } + } + + // Stack arguments (always right-to-left). + for (i = argumentsCount - 1; i != -1; i--) + { + Argument& a = _arguments[i]; + if (a.isAssigned()) continue; + + if (isVariableInteger(a.variableType)) + { + stackOffset -= 8; // Always 8 bytes. + a.stackOffset = stackOffset; + } + else if (isVariableFloat(a.variableType)) + { + int32_t size = (int32_t)variableInfo[a.variableType].size; + stackOffset -= size; + a.stackOffset = stackOffset; + } + } + + // 32 bytes shadow space (X64W calling convention specific). + stackOffset -= 4 * 8; + } + // Linux/Unix 64-bit (AMD64 calling convention). + else + { + // Register arguments (Integer), always left to right. + for (i = 0; i != argumentsCount; i++) + { + Argument& a = _arguments[i]; + if (isVariableInteger(a.variableType) && posGP < 32 && _argumentsGPList[posGP] != INVALID_VALUE) + { + a.registerIndex = _argumentsGPList[posGP++]; + _passedGP |= Util::maskFromIndex(a.registerIndex); + } + } + + // Register arguments (FP), always left to right. + for (i = 0; i != argumentsCount; i++) + { + Argument& a = _arguments[i]; + if (isVariableFloat(a.variableType)) + { + a.registerIndex = _argumentsXMMList[posXMM++]; + _passedXMM |= Util::maskFromIndex(a.registerIndex); + } + } + + // Stack arguments. + for (i = argumentsCount - 1; i != -1; i--) + { + Argument& a = _arguments[i]; + if (a.isAssigned()) continue; + + if (isVariableInteger(a.variableType)) + { + stackOffset -= 8; + a.stackOffset = stackOffset; + } + else if (isVariableFloat(a.variableType)) + { + int32_t size = (int32_t)variableInfo[a.variableType].size; + + stackOffset -= size; + a.stackOffset = stackOffset; + } + } + } +#endif // ASMJIT_X64 + + // Modify stack offset (all function parameters will be in positive stack + // offset that is never zero). + for (i = 0; i < (sysint_t)argumentsCount; i++) + { + if (_arguments[i].registerIndex == INVALID_VALUE) + _arguments[i].stackOffset += sizeof(sysint_t) - stackOffset; + } + + _argumentsStackSize = (uint32_t)(-stackOffset); +} + +void FunctionPrototype::_setReturnValue(uint32_t valueId) ASMJIT_NOTHROW +{ + // TODO. +} + +// ============================================================================ +// [AsmJit::EVariableHint] +// ============================================================================ + +EVariableHint::EVariableHint(Compiler* c, VarData* vdata, uint32_t hintId, uint32_t hintValue) ASMJIT_NOTHROW : + Emittable(c, EMITTABLE_VARIABLE_HINT), + _vdata(vdata), + _hintId(hintId), + _hintValue(hintValue) +{ + ASMJIT_ASSERT(_vdata != NULL); +} + +EVariableHint::~EVariableHint() ASMJIT_NOTHROW +{ +} + +void EVariableHint::prepare(CompilerContext& cc) ASMJIT_NOTHROW +{ + _offset = cc._currentOffset; + + // First emittable (begin of variable scope). + if (_vdata->firstEmittable == NULL) _vdata->firstEmittable = this; + + Emittable* oldLast = _vdata->lastEmittable; + + // Last emittable (end of variable scope). + _vdata->lastEmittable = this; + + switch (_hintId) + { + case VARIABLE_HINT_ALLOC: + case VARIABLE_HINT_SPILL: + case VARIABLE_HINT_SAVE: + if (!cc._isActive(_vdata)) cc._addActive(_vdata); + break; + case VARIABLE_HINT_SAVE_AND_UNUSE: + if (!cc._isActive(_vdata)) cc._addActive(_vdata); + break; + case VARIABLE_HINT_UNUSE: + if (oldLast) oldLast->_tryUnuseVar(_vdata); + break; + } +} + +Emittable* EVariableHint::translate(CompilerContext& cc) ASMJIT_NOTHROW +{ + switch (_hintId) + { + case VARIABLE_HINT_ALLOC: + cc.allocVar(_vdata, _hintValue, VARIABLE_ALLOC_READWRITE); + break; + case VARIABLE_HINT_SPILL: + if (_vdata->state == VARIABLE_STATE_REGISTER) + cc.spillVar(_vdata); + break; + case VARIABLE_HINT_SAVE: + case VARIABLE_HINT_SAVE_AND_UNUSE: + if (_vdata->state == VARIABLE_STATE_REGISTER && _vdata->changed) + { + cc.emitSaveVar(_vdata, _vdata->registerIndex); + _vdata->changed = false; + } + if (_hintId == VARIABLE_HINT_SAVE_AND_UNUSE) goto unuse; + break; + case VARIABLE_HINT_UNUSE: +unuse: + cc.unuseVar(_vdata, VARIABLE_STATE_UNUSED); + goto end; + } + + cc._unuseVarOnEndOfScope(this, _vdata); + +end: + return translated(); +} + +int EVariableHint::getMaxSize() const ASMJIT_NOTHROW +{ + // Variable hint is NOP, but it can generate other emittables which can do + // something. + return 0; +} + +// ============================================================================ +// [AsmJit::EInstruction] +// ============================================================================ + +EInstruction::EInstruction(Compiler* c, uint32_t code, Operand* operandsData, uint32_t operandsCount) ASMJIT_NOTHROW : + Emittable(c, EMITTABLE_INSTRUCTION) +{ + _code = code; + _emitOptions = c->_emitOptions; + // Each created instruction takes emit options and clears it. + c->_emitOptions = 0; + + _operands = operandsData; + _operandsCount = operandsCount; + _memOp = NULL; + + _variables = NULL; + _variablesCount = 0; + + uint32_t i; + for (i = 0; i < operandsCount; i++) + { + if (_operands[i].isMem()) + { + _memOp = reinterpret_cast<Mem*>(&_operands[i]); + break; + } + } + + const InstructionDescription* id = &instructionDescription[_code]; + _isSpecial = id->isSpecial(); + _isFPU = id->isFPU(); + _isGPBLoUsed = false; + _isGPBHiUsed = false; + + if (_isSpecial) + { + // ${SPECIAL_INSTRUCTION_HANDLING_BEGIN} + switch (_code) + { + case INST_CPUID: + // Special... + break; + + case INST_CBW: + case INST_CDQE: + case INST_CWDE: + // Special... + break; + + case INST_CMPXCHG: + case INST_CMPXCHG8B: +#if defined(ASMJIT_X64) + case INST_CMPXCHG16B: +#endif // ASMJIT_X64 + // Special... + break; + +#if defined(ASMJIT_X86) + case INST_DAA: + case INST_DAS: + // Special... + break; +#endif // ASMJIT_X86 + + case INST_IMUL: + switch (operandsCount) + { + case 2: + // IMUL dst, src is not special instruction. + _isSpecial = false; + break; + case 3: + if (!(_operands[0].isVar() && _operands[1].isVar() && _operands[2].isVarMem())) + { + // Only IMUL dst_lo, dst_hi, reg/mem is special, all others not. + _isSpecial = false; + } + break; + } + break; + case INST_MUL: + case INST_IDIV: + case INST_DIV: + // Special... + break; + + case INST_MOV_PTR: + // Special... + break; + + case INST_LAHF: + case INST_SAHF: + // Special... + break; + + case INST_MASKMOVQ: + case INST_MASKMOVDQU: + // Special... + break; + + case INST_ENTER: + case INST_LEAVE: + // Special... + break; + + case INST_RET: + // Special... + break; + + case INST_MONITOR: + case INST_MWAIT: + // Special... + break; + + case INST_POP: + case INST_POPAD: + case INST_POPFD: + case INST_POPFQ: + // Special... + break; + + case INST_PUSH: + case INST_PUSHAD: + case INST_PUSHFD: + case INST_PUSHFQ: + // Special... + break; + + case INST_RCL: + case INST_RCR: + case INST_ROL: + case INST_ROR: + case INST_SAL: + case INST_SAR: + case INST_SHL: + case INST_SHR: + // Rot instruction is special only if last operand is variable (register). + _isSpecial = _operands[1].isVar(); + break; + + case INST_SHLD: + case INST_SHRD: + // Shld/Shrd instruction is special only if last operand is variable (register). + _isSpecial = _operands[2].isVar(); + break; + + case INST_RDTSC: + case INST_RDTSCP: + // Special... + break; + + case INST_REP_LODSB: + case INST_REP_LODSD: + case INST_REP_LODSQ: + case INST_REP_LODSW: + case INST_REP_MOVSB: + case INST_REP_MOVSD: + case INST_REP_MOVSQ: + case INST_REP_MOVSW: + case INST_REP_STOSB: + case INST_REP_STOSD: + case INST_REP_STOSQ: + case INST_REP_STOSW: + case INST_REPE_CMPSB: + case INST_REPE_CMPSD: + case INST_REPE_CMPSQ: + case INST_REPE_CMPSW: + case INST_REPE_SCASB: + case INST_REPE_SCASD: + case INST_REPE_SCASQ: + case INST_REPE_SCASW: + case INST_REPNE_CMPSB: + case INST_REPNE_CMPSD: + case INST_REPNE_CMPSQ: + case INST_REPNE_CMPSW: + case INST_REPNE_SCASB: + case INST_REPNE_SCASD: + case INST_REPNE_SCASQ: + case INST_REPNE_SCASW: + // Special... + break; + + default: + ASMJIT_ASSERT(0); + } + // ${SPECIAL_INSTRUCTION_HANDLING_END} + } +} + +EInstruction::~EInstruction() ASMJIT_NOTHROW +{ +} + +void EInstruction::prepare(CompilerContext& cc) ASMJIT_NOTHROW +{ +#define __GET_VARIABLE(__vardata__) \ + { \ + VarData* _candidate = __vardata__; \ + \ + for (var = cur; ;) \ + { \ + if (var == _variables) \ + { \ + var = cur++; \ + var->vdata = _candidate; \ + var->vflags = 0; \ + var->regMask = 0xFFFFFFFF; \ + break; \ + } \ + \ + var--; \ + \ + if (var->vdata == _candidate) \ + { \ + break; \ + } \ + } \ + \ + ASMJIT_ASSERT(var != NULL); \ + } + + _offset = cc._currentOffset; + + const InstructionDescription* id = &instructionDescription[_code]; + + uint32_t i, len = _operandsCount; + uint32_t variablesCount = 0; + + for (i = 0; i < len; i++) + { + Operand& o = _operands[i]; + + if (o.isVar()) + { + ASMJIT_ASSERT(o.getId() != INVALID_VALUE); + VarData* vdata = _compiler->_getVarData(o.getId()); + ASMJIT_ASSERT(vdata != NULL); + + if (reinterpret_cast<BaseVar*>(&o)->isGPVar()) + { + if (reinterpret_cast<GPVar*>(&o)->isGPBLo()) { _isGPBLoUsed = true; vdata->registerGPBLoCount++; }; + if (reinterpret_cast<GPVar*>(&o)->isGPBHi()) { _isGPBHiUsed = true; vdata->registerGPBHiCount++; }; + } + + if (vdata->workOffset != _offset) + { + if (!cc._isActive(vdata)) cc._addActive(vdata); + + vdata->workOffset = _offset; + variablesCount++; + } + } + else if (o.isMem()) + { + if ((o.getId() & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(o.getId()); + ASMJIT_ASSERT(vdata != NULL); + + cc._markMemoryUsed(vdata); + + if (vdata->workOffset != _offset) + { + if (!cc._isActive(vdata)) cc._addActive(vdata); + + vdata->workOffset = _offset; + variablesCount++; + } + } + else if ((o._mem.base & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(o._mem.base); + ASMJIT_ASSERT(vdata != NULL); + + if (vdata->workOffset != _offset) + { + if (!cc._isActive(vdata)) cc._addActive(vdata); + + vdata->workOffset = _offset; + variablesCount++; + } + } + + if ((o._mem.index & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(o._mem.index); + ASMJIT_ASSERT(vdata != NULL); + + if (vdata->workOffset != _offset) + { + if (!cc._isActive(vdata)) cc._addActive(vdata); + + vdata->workOffset = _offset; + variablesCount++; + } + } + } + } + + if (!variablesCount) + { + cc._currentOffset++; + return; + } + + _variables = reinterpret_cast<VarAllocRecord*>(_compiler->getZone().zalloc(sizeof(VarAllocRecord) * variablesCount)); + if (!_variables) + { + _compiler->setError(ERROR_NO_HEAP_MEMORY); + cc._currentOffset++; + return; + } + + _variablesCount = variablesCount; + + VarAllocRecord* cur = _variables; + VarAllocRecord* var = NULL; + + bool _isGPBUsed = _isGPBLoUsed | _isGPBHiUsed; + uint32_t gpRestrictMask = Util::maskUpToIndex(REG_NUM_GP); + +#if defined(ASMJIT_X64) + if (_isGPBHiUsed) + { + gpRestrictMask &= Util::maskFromIndex(REG_INDEX_EAX) | + Util::maskFromIndex(REG_INDEX_EBX) | + Util::maskFromIndex(REG_INDEX_ECX) | + Util::maskFromIndex(REG_INDEX_EDX) | + Util::maskFromIndex(REG_INDEX_EBP) | + Util::maskFromIndex(REG_INDEX_ESI) | + Util::maskFromIndex(REG_INDEX_EDI) ; + } +#endif // ASMJIT_X64 + + for (i = 0; i < len; i++) + { + Operand& o = _operands[i]; + + if (o.isVar()) + { + VarData* vdata = _compiler->_getVarData(o.getId()); + ASMJIT_ASSERT(vdata != NULL); + + __GET_VARIABLE(vdata) + var->vflags |= VARIABLE_ALLOC_REGISTER; + + if (_isGPBUsed) + { +#if defined(ASMJIT_X86) + if (reinterpret_cast<GPVar*>(&o)->isGPB()) + { + var->regMask &= Util::maskFromIndex(REG_INDEX_EAX) | + Util::maskFromIndex(REG_INDEX_EBX) | + Util::maskFromIndex(REG_INDEX_ECX) | + Util::maskFromIndex(REG_INDEX_EDX) ; + } +#else + // Restrict all BYTE registers to RAX/RBX/RCX/RDX if HI BYTE register + // is used (REX prefix makes HI BYTE addressing unencodable). + if (_isGPBHiUsed) + { + if (reinterpret_cast<GPVar*>(&o)->isGPB()) + { + var->regMask &= Util::maskFromIndex(REG_INDEX_EAX) | + Util::maskFromIndex(REG_INDEX_EBX) | + Util::maskFromIndex(REG_INDEX_ECX) | + Util::maskFromIndex(REG_INDEX_EDX) ; + } + } +#endif // ASMJIT_X86/X64 + } + + if (isSpecial()) + { + // ${SPECIAL_INSTRUCTION_HANDLING_BEGIN} + switch (_code) + { + case INST_CPUID: + switch (i) + { + case 0: + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; + case 1: + vdata->registerWriteCount++; + var->vflags |= VARIABLE_ALLOC_WRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EBX); + gpRestrictMask &= ~var->regMask; + break; + case 2: + vdata->registerWriteCount++; + var->vflags |= VARIABLE_ALLOC_WRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_ECX); + gpRestrictMask &= ~var->regMask; + break; + case 3: + vdata->registerWriteCount++; + var->vflags |= VARIABLE_ALLOC_WRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EDX); + gpRestrictMask &= ~var->regMask; + break; + + default: + ASMJIT_ASSERT(0); + } + break; + + case INST_CBW: + case INST_CDQE: + case INST_CWDE: + switch (i) + { + case 0: + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; + + default: + ASMJIT_ASSERT(0); + } + break; + + case INST_CMPXCHG: + switch (i) + { + case 0: + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; + case 1: + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE; + break; + case 2: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ; + break; + + default: + ASMJIT_ASSERT(0); + } + break; + + case INST_CMPXCHG8B: +#if defined(ASMJIT_X64) + case INST_CMPXCHG16B: +#endif // ASMJIT_X64 + switch (i) + { + case 0: + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EDX); + gpRestrictMask &= ~var->regMask; + break; + case 1: + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; + case 2: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_ECX); + gpRestrictMask &= ~var->regMask; + break; + case 3: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EBX); + gpRestrictMask &= ~var->regMask; + break; + + default: + ASMJIT_ASSERT(0); + } + break; + +#if defined(ASMJIT_X86) + case INST_DAA: + case INST_DAS: + ASMJIT_ASSERT(i == 0); + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; +#endif // ASMJIT_X86 + + case INST_IMUL: + case INST_MUL: + case INST_IDIV: + case INST_DIV: + switch (i) + { + case 0: + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; + case 1: + vdata->registerWriteCount++; + var->vflags |= VARIABLE_ALLOC_WRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EDX); + gpRestrictMask &= ~var->regMask; + break; + case 2: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ; + break; + + default: + ASMJIT_ASSERT(0); + } + break; + + case INST_MOV_PTR: + switch (i) + { + case 0: + vdata->registerWriteCount++; + var->vflags |= VARIABLE_ALLOC_WRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; + case 1: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; + default: + ASMJIT_ASSERT(0); + } + break; + + case INST_LAHF: + ASMJIT_ASSERT(i == 0); + vdata->registerWriteCount++; + var->vflags |= VARIABLE_ALLOC_WRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; + + case INST_SAHF: + ASMJIT_ASSERT(i == 0); + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; + + case INST_MASKMOVQ: + case INST_MASKMOVDQU: + switch (i) + { + case 0: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EDI); + gpRestrictMask &= ~var->regMask; + break; + case 1: + case 2: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ; + break; + } + break; + + case INST_ENTER: + case INST_LEAVE: + // TODO: SPECIAL INSTRUCTION. + break; + + case INST_RET: + // TODO: SPECIAL INSTRUCTION. + break; + + case INST_MONITOR: + case INST_MWAIT: + // TODO: MONITOR/MWAIT (COMPILER). + break; + + case INST_POP: + // TODO: SPECIAL INSTRUCTION. + break; + + case INST_POPAD: + case INST_POPFD: + case INST_POPFQ: + // TODO: SPECIAL INSTRUCTION. + break; + + case INST_PUSH: + // TODO: SPECIAL INSTRUCTION. + break; + + case INST_PUSHAD: + case INST_PUSHFD: + case INST_PUSHFQ: + // TODO: SPECIAL INSTRUCTION. + break; + + case INST_RCL: + case INST_RCR: + case INST_ROL: + case INST_ROR: + case INST_SAL: + case INST_SAR: + case INST_SHL: + case INST_SHR: + switch (i) + { + case 0: + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE; + break; + case 1: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_ECX); + gpRestrictMask &= ~var->regMask; + break; + + default: + ASMJIT_ASSERT(0); + } + break; + + case INST_SHLD: + case INST_SHRD: + switch (i) + { + case 0: + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE; + break; + case 1: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ; + break; + case 2: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_ECX); + gpRestrictMask &= ~var->regMask; + break; + + default: + ASMJIT_ASSERT(0); + } + break; + + case INST_RDTSC: + case INST_RDTSCP: + switch (i) + { + case 0: + vdata->registerWriteCount++; + var->vflags |= VARIABLE_ALLOC_WRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EDX); + gpRestrictMask &= ~var->regMask; + break; + case 1: + vdata->registerWriteCount++; + var->vflags |= VARIABLE_ALLOC_WRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; + case 2: + ASMJIT_ASSERT(_code == INST_RDTSCP); + vdata->registerWriteCount++; + var->vflags |= VARIABLE_ALLOC_WRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_ECX); + gpRestrictMask &= ~var->regMask; + break; + + default: + ASMJIT_ASSERT(0); + } + break; + + case INST_REP_LODSB: + case INST_REP_LODSD: + case INST_REP_LODSQ: + case INST_REP_LODSW: + switch (i) + { + case 0: + vdata->registerWriteCount++; + var->vflags |= VARIABLE_ALLOC_WRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; + case 1: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_ESI); + gpRestrictMask &= ~var->regMask; + break; + case 2: + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_ECX); + gpRestrictMask &= ~var->regMask; + break; + default: + ASMJIT_ASSERT(0); + } + break; + + case INST_REP_MOVSB: + case INST_REP_MOVSD: + case INST_REP_MOVSQ: + case INST_REP_MOVSW: + case INST_REPE_CMPSB: + case INST_REPE_CMPSD: + case INST_REPE_CMPSQ: + case INST_REPE_CMPSW: + case INST_REPNE_CMPSB: + case INST_REPNE_CMPSD: + case INST_REPNE_CMPSQ: + case INST_REPNE_CMPSW: + switch (i) + { + case 0: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EDI); + gpRestrictMask &= ~var->regMask; + break; + case 1: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_ESI); + gpRestrictMask &= ~var->regMask; + break; + case 2: + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_ECX); + gpRestrictMask &= ~var->regMask; + break; + default: + ASMJIT_ASSERT(0); + } + break; + + case INST_REP_STOSB: + case INST_REP_STOSD: + case INST_REP_STOSQ: + case INST_REP_STOSW: + switch (i) + { + case 0: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EDI); + gpRestrictMask &= ~var->regMask; + break; + case 1: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; + case 2: + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_ECX); + gpRestrictMask &= ~var->regMask; + break; + default: + ASMJIT_ASSERT(0); + } + break; + + case INST_REPE_SCASB: + case INST_REPE_SCASD: + case INST_REPE_SCASQ: + case INST_REPE_SCASW: + case INST_REPNE_SCASB: + case INST_REPNE_SCASD: + case INST_REPNE_SCASQ: + case INST_REPNE_SCASW: + switch (i) + { + case 0: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EDI); + gpRestrictMask &= ~var->regMask; + break; + case 1: + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_EAX); + gpRestrictMask &= ~var->regMask; + break; + case 2: + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE | VARIABLE_ALLOC_SPECIAL; + var->regMask = Util::maskFromIndex(REG_INDEX_ECX); + gpRestrictMask &= ~var->regMask; + break; + default: + ASMJIT_ASSERT(0); + } + break; + + default: + ASMJIT_ASSERT(0); + } + // ${SPECIAL_INSTRUCTION_HANDLING_END} + } + else + { + if (i == 0) + { + // CMP/TEST instruction. + if (id->code == INST_CMP || id->code == INST_TEST) + { + // Read-only case. + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ; + } + // MOV/MOVSS/MOVSD instructions. + // + // If instruction is MOV (source replaces the destination) or + // MOVSS/MOVSD and source operand is memory location then register + // allocator should know that previous destination value is lost + // (write only operation). + else if ((id->isMov()) || + ((id->code == INST_MOVSS || id->code == INST_MOVSD) /* && _operands[1].isMem() */) || + (id->code == INST_IMUL && _operandsCount == 3 && !isSpecial())) + { + // Write-only case. + vdata->registerWriteCount++; + var->vflags |= VARIABLE_ALLOC_WRITE; + } + else if (id->code == INST_LEA) + { + // Write. + vdata->registerWriteCount++; + var->vflags |= VARIABLE_ALLOC_WRITE; + } + else + { + // Read/Write. + vdata->registerRWCount++; + var->vflags |= VARIABLE_ALLOC_READWRITE; + } + } + else + { + // Second, third, ... operands are read-only. + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_READ; + } + + if (!_memOp && i < 2 && (id->oflags[i] & InstructionDescription::O_MEM) != 0) + { + var->vflags |= VARIABLE_ALLOC_MEMORY; + } + } + + // If variable must be in specific register we could add some hint to allocator. + if (var->vflags & VARIABLE_ALLOC_SPECIAL) + { + vdata->prefRegisterMask |= Util::maskFromIndex(var->regMask); + cc._newRegisterHomeIndex(vdata, Util::findFirstBit(var->regMask)); + } + } + else if (o.isMem()) + { + if ((o.getId() & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(o.getId()); + ASMJIT_ASSERT(vdata != NULL); + + __GET_VARIABLE(vdata) + + if (i == 0) + { + // If variable is MOV instruction type (source replaces the destination) + // or variable is MOVSS/MOVSD instruction then register allocator should + // know that previous destination value is lost (write only operation). + if (id->isMov() || ((id->code == INST_MOVSS || id->code == INST_MOVSD))) + { + // Write only case. + vdata->memoryWriteCount++; + } + else + { + vdata->memoryRWCount++; + } + } + else + { + vdata->memoryReadCount++; + } + } + else if ((o._mem.base & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(reinterpret_cast<Mem&>(o).getBase()); + ASMJIT_ASSERT(vdata != NULL); + + __GET_VARIABLE(vdata) + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_REGISTER | VARIABLE_ALLOC_READ; + var->regMask &= gpRestrictMask; + } + + if ((o._mem.index & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(reinterpret_cast<Mem&>(o).getIndex()); + ASMJIT_ASSERT(vdata != NULL); + + __GET_VARIABLE(vdata) + vdata->registerReadCount++; + var->vflags |= VARIABLE_ALLOC_REGISTER | VARIABLE_ALLOC_READ; + var->regMask &= gpRestrictMask; + } + } + } + + // Traverse all variables and update firstEmittable / lastEmittable. This + // function is called from iterator that scans emittables using forward + // direction so we can use this knowledge to optimize the process. + // + // Similar to ECall::prepare(). + for (i = 0; i < _variablesCount; i++) + { + VarData* v = _variables[i].vdata; + + // Update GP register allocator restrictions. + if (isVariableInteger(v->type)) + { + if (_variables[i].regMask == 0xFFFFFFFF) _variables[i].regMask &= gpRestrictMask; + } + + // Update first/last emittable (begin of variable scope). + if (v->firstEmittable == NULL) v->firstEmittable = this; + v->lastEmittable = this; + } + + // There are some instructions that can be used to clear register or to set + // register to some value (ideal case is all zeros or all ones). + // + // xor/pxor reg, reg ; Set all bits in reg to 0. + // sub/psub reg, reg ; Set all bits in reg to 0. + // andn reg, reg ; Set all bits in reg to 0. + // pcmpgt reg, reg ; Set all bits in reg to 0. + // pcmpeq reg, reg ; Set all bits in reg to 1. + + if (_variablesCount == 1 && + _operandsCount > 1 && + _operands[0].isVar() && + _operands[1].isVar() && + !_memOp) + { + switch (_code) + { + // XOR Instructions. + case INST_XOR: + case INST_XORPD: + case INST_XORPS: + case INST_PXOR: + + // ANDN Instructions. + case INST_PANDN: + + // SUB Instructions. + case INST_SUB: + case INST_PSUBB: + case INST_PSUBW: + case INST_PSUBD: + case INST_PSUBQ: + case INST_PSUBSB: + case INST_PSUBSW: + case INST_PSUBUSB: + case INST_PSUBUSW: + + // PCMPEQ Instructions. + case INST_PCMPEQB: + case INST_PCMPEQW: + case INST_PCMPEQD: + case INST_PCMPEQQ: + + // PCMPGT Instructions. + case INST_PCMPGTB: + case INST_PCMPGTW: + case INST_PCMPGTD: + case INST_PCMPGTQ: + // Clear the read flag. This prevents variable alloc/spill. + _variables[0].vflags = VARIABLE_ALLOC_WRITE; + _variables[0].vdata->registerReadCount--; + break; + } + } + cc._currentOffset++; + +#undef __GET_VARIABLE +} + +Emittable* EInstruction::translate(CompilerContext& cc) ASMJIT_NOTHROW +{ + uint32_t i; + uint32_t variablesCount = _variablesCount; + + if (variablesCount > 0) + { + // These variables are used by the instruction and we set current offset + // to their work offsets -> getSpillCandidate never return the variable + // used this instruction. + for (i = 0; i < variablesCount; i++) + { + _variables->vdata->workOffset = cc._currentOffset; + } + + // Alloc variables used by the instruction (special first). + for (i = 0; i < variablesCount; i++) + { + VarAllocRecord& r = _variables[i]; + // Alloc variables with specific register first. + if ((r.vflags & VARIABLE_ALLOC_SPECIAL) != 0) + cc.allocVar(r.vdata, r.regMask, r.vflags); + } + + for (i = 0; i < variablesCount; i++) + { + VarAllocRecord& r = _variables[i]; + // Alloc variables without specific register last. + if ((r.vflags & VARIABLE_ALLOC_SPECIAL) == 0) + cc.allocVar(r.vdata, r.regMask, r.vflags); + } + + cc.translateOperands(_operands, _operandsCount); + } + + if (_memOp && (_memOp->getId() & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(_memOp->getId()); + ASMJIT_ASSERT(vdata != NULL); + + switch (vdata->state) + { + case VARIABLE_STATE_UNUSED: + vdata->state = VARIABLE_STATE_MEMORY; + break; + case VARIABLE_STATE_REGISTER: + vdata->changed = false; + cc.unuseVar(vdata, VARIABLE_STATE_MEMORY); + break; + } + } + + for (i = 0; i < variablesCount; i++) + { + cc._unuseVarOnEndOfScope(this, &_variables[i]); + } + + return translated(); +} + +void EInstruction::emit(Assembler& a) ASMJIT_NOTHROW +{ + a._comment = _comment; + a._emitOptions = _emitOptions; + + if (isSpecial()) + { + // ${SPECIAL_INSTRUCTION_HANDLING_BEGIN} + switch (_code) + { + case INST_CPUID: + a._emitInstruction(_code); + return; + + case INST_CBW: + case INST_CDQE: + case INST_CWDE: + a._emitInstruction(_code); + return; + + case INST_CMPXCHG: + a._emitInstruction(_code, &_operands[1], &_operands[2]); + return; + + case INST_CMPXCHG8B: +#if defined(ASMJIT_X64) + case INST_CMPXCHG16B: +#endif // ASMJIT_X64 + a._emitInstruction(_code, &_operands[4]); + return; + +#if defined(ASMJIT_X86) + case INST_DAA: + case INST_DAS: + a._emitInstruction(_code); + return; +#endif // ASMJIT_X86 + + case INST_IMUL: + case INST_MUL: + case INST_IDIV: + case INST_DIV: + // INST dst_lo (implicit), dst_hi (implicit), src (explicit) + ASMJIT_ASSERT(_operandsCount == 3); + a._emitInstruction(_code, &_operands[2]); + return; + + case INST_MOV_PTR: + break; + + case INST_LAHF: + case INST_SAHF: + a._emitInstruction(_code); + return; + + case INST_MASKMOVQ: + case INST_MASKMOVDQU: + a._emitInstruction(_code, &_operands[1], &_operands[2]); + return; + + case INST_ENTER: + case INST_LEAVE: + // TODO: SPECIAL INSTRUCTION. + break; + + case INST_RET: + // TODO: SPECIAL INSTRUCTION. + break; + + case INST_MONITOR: + case INST_MWAIT: + // TODO: MONITOR/MWAIT (COMPILER). + break; + + case INST_POP: + case INST_POPAD: + case INST_POPFD: + case INST_POPFQ: + // TODO: SPECIAL INSTRUCTION. + break; + + case INST_PUSH: + case INST_PUSHAD: + case INST_PUSHFD: + case INST_PUSHFQ: + // TODO: SPECIAL INSTRUCTION. + break; + + case INST_RCL: + case INST_RCR: + case INST_ROL: + case INST_ROR: + case INST_SAL: + case INST_SAR: + case INST_SHL: + case INST_SHR: + a._emitInstruction(_code, &_operands[0], &cl); + return; + + case INST_SHLD: + case INST_SHRD: + a._emitInstruction(_code, &_operands[0], &_operands[1], &cl); + return; + + case INST_RDTSC: + case INST_RDTSCP: + a._emitInstruction(_code); + return; + + case INST_REP_LODSB: + case INST_REP_LODSD: + case INST_REP_LODSQ: + case INST_REP_LODSW: + case INST_REP_MOVSB: + case INST_REP_MOVSD: + case INST_REP_MOVSQ: + case INST_REP_MOVSW: + case INST_REP_STOSB: + case INST_REP_STOSD: + case INST_REP_STOSQ: + case INST_REP_STOSW: + case INST_REPE_CMPSB: + case INST_REPE_CMPSD: + case INST_REPE_CMPSQ: + case INST_REPE_CMPSW: + case INST_REPE_SCASB: + case INST_REPE_SCASD: + case INST_REPE_SCASQ: + case INST_REPE_SCASW: + case INST_REPNE_CMPSB: + case INST_REPNE_CMPSD: + case INST_REPNE_CMPSQ: + case INST_REPNE_CMPSW: + case INST_REPNE_SCASB: + case INST_REPNE_SCASD: + case INST_REPNE_SCASQ: + case INST_REPNE_SCASW: + a._emitInstruction(_code); + return; + + default: + ASMJIT_ASSERT(0); + } + // ${SPECIAL_INSTRUCTION_HANDLING_END} + } + + switch (_operandsCount) + { + case 0: + a._emitInstruction(_code); + break; + case 1: + a._emitInstruction(_code, &_operands[0]); + break; + case 2: + a._emitInstruction(_code, &_operands[0], &_operands[1]); + break; + case 3: + a._emitInstruction(_code, &_operands[0], &_operands[1], &_operands[2]); + break; + default: + ASMJIT_ASSERT(0); + break; + } +} + +int EInstruction::getMaxSize() const ASMJIT_NOTHROW +{ + // TODO: Do something more exact. + return 15; +} + +bool EInstruction::_tryUnuseVar(VarData* v) ASMJIT_NOTHROW +{ + for (uint32_t i = 0; i < _variablesCount; i++) + { + if (_variables[i].vdata == v) + { + _variables[i].vflags |= VARIABLE_ALLOC_UNUSE_AFTER_USE; + return true; + } + } + + return false; +} + +ETarget* EInstruction::getJumpTarget() const ASMJIT_NOTHROW +{ + return NULL; +} + +// ============================================================================ +// [AsmJit::EJmp] +// ============================================================================ + +EJmp::EJmp(Compiler* c, uint32_t code, Operand* operandsData, uint32_t operandsCount) ASMJIT_NOTHROW : + EInstruction(c, code, operandsData, operandsCount) +{ + _jumpTarget = _compiler->_getTarget(_operands[0].getId()); + _jumpTarget->_jumpsCount++; + + _jumpNext = _jumpTarget->_from; + _jumpTarget->_from = this; + + // The 'jmp' is always taken, conditional jump can contain hint, we detect it. + _isTaken = (getCode() == INST_JMP) || + (operandsCount > 1 && + operandsData[1].isImm() && + reinterpret_cast<Imm*>(&operandsData[1])->getValue() == HINT_TAKEN); +} + +EJmp::~EJmp() ASMJIT_NOTHROW +{ +} + +void EJmp::prepare(CompilerContext& cc) ASMJIT_NOTHROW +{ + _offset = cc._currentOffset; + + // Update _isTaken to true if this is conditional backward jump. This behavior + // can be overriden by using HINT_NOT_TAKEN when using the instruction. + if (getCode() != INST_JMP && + _operandsCount == 1 && + _jumpTarget->getOffset() < getOffset()) + { + _isTaken = true; + } + + // Now patch all variables where jump location is in the active range. + if (_jumpTarget->getOffset() != INVALID_VALUE && cc._active) + { + VarData* first = cc._active; + VarData* var = first; + uint32_t jumpOffset = _jumpTarget->getOffset(); + + do { + if (var->firstEmittable) + { + ASMJIT_ASSERT(var->lastEmittable != NULL); + uint32_t start = var->firstEmittable->getOffset(); + uint32_t end = var->lastEmittable->getOffset(); + + if (jumpOffset >= start && jumpOffset <= end) var->lastEmittable = this; + } + var = var->nextActive; + } while (var != first); + } + + cc._currentOffset++; +} + +Emittable* EJmp::translate(CompilerContext& cc) ASMJIT_NOTHROW +{ + // Translate using EInstruction. + Emittable* ret = EInstruction::translate(cc); + + // We jump with emittable if its INST_JUMP (not based on condiiton) and it + // points into yet unknown location. + if (_code == INST_JMP && !_jumpTarget->isTranslated()) + { + cc.addBackwardCode(this); + ret = _jumpTarget; + } + else + { + _state = cc._saveState(); + if (_jumpTarget->isTranslated()) + { + _doJump(cc); + } + else + { + // State is not known, so we need to call _doJump() later. Compiler will + // do it for us. + cc.addForwardJump(this); + _jumpTarget->_state = _state; + } + + // Mark next code as unrecheable, cleared by a next label (ETarget). + if (_code == INST_JMP) cc._unrecheable = 1; + } + + // Need to traverse over all active variables and unuse them if their scope ends + // here. + if (cc._active) + { + VarData* first = cc._active; + VarData* var = first; + + do { + cc._unuseVarOnEndOfScope(this, var); + var = var->nextActive; + } while (var != first); + } + + return ret; +} + +void EJmp::emit(Assembler& a) ASMJIT_NOTHROW +{ + static const uint MAXIMUM_SHORT_JMP_SIZE = 127; + + // Try to minimize size of jump using SHORT jump (8-bit displacement) by + // traversing into the target and calculating the maximum code size. We + // end when code size reaches MAXIMUM_SHORT_JMP_SIZE. + if (!(_emitOptions & EMIT_OPTION_SHORT_JUMP) && getJumpTarget()->getOffset() > getOffset()) + { + // Calculate the code size. + uint codeSize = 0; + Emittable* cur = this->getNext(); + Emittable* target = getJumpTarget(); + + while (cur) + { + if (cur == target) + { + // Target found, we can tell assembler to generate short form of jump. + _emitOptions |= EMIT_OPTION_SHORT_JUMP; + goto end; + } + + int s = cur->getMaxSize(); + if (s == -1) break; + + codeSize += (uint)s; + if (codeSize > MAXIMUM_SHORT_JMP_SIZE) break; + + cur = cur->getNext(); + } + } + +end: + EInstruction::emit(a); +} + +void EJmp::_doJump(CompilerContext& cc) ASMJIT_NOTHROW +{ + // The state have to be already known. The _doJump() method is called by + // translate() or by Compiler in case that it's forward jump. + ASMJIT_ASSERT(_jumpTarget->getState()); + + if (getCode() == INST_JMP || (isTaken() && _jumpTarget->getOffset() < getOffset())) + { + // Instruction type is JMP or conditional jump that should be taken (likely). + // We can set state here instead of jumping out, setting state and jumping + // to _jumpTarget. + // + // NOTE: We can't use this technique if instruction is forward conditional + // jump. The reason is that when generating code we can't change state here, + // because next instruction depends to it. + cc._restoreState(_jumpTarget->getState(), _jumpTarget->getOffset()); + } + else + { + // Instruction type is JMP or conditional jump that should be not normally + // taken. If we need add code that will switch between different states we + // add it after the end of function body (after epilog, using 'ExtraBlock'). + Compiler* compiler = cc.getCompiler(); + + Emittable* ext = cc.getExtraBlock(); + Emittable* old = compiler->setCurrentEmittable(ext); + + cc._restoreState(_jumpTarget->getState(), _jumpTarget->getOffset()); + + if (compiler->getCurrentEmittable() != ext) + { + // Add the jump to the target. + compiler->jmp(_jumpTarget->_label); + ext = compiler->getCurrentEmittable(); + + // The cc._restoreState() method emitted some instructions so we need to + // patch the jump. + Label L = compiler->newLabel(); + compiler->setCurrentEmittable(cc.getExtraBlock()); + compiler->bind(L); + + // Finally, patch the jump target. + ASMJIT_ASSERT(_operandsCount > 0); + _operands[0] = L; // Operand part (Label). + _jumpTarget = compiler->_getTarget(L.getId()); // Emittable part (ETarget). + } + + cc.setExtraBlock(ext); + compiler->setCurrentEmittable(old); + + // Assign state back. + cc._assignState(_state); + } +} + +ETarget* EJmp::getJumpTarget() const ASMJIT_NOTHROW +{ + return _jumpTarget; +} + +// ============================================================================ +// [AsmJit::EFunction] +// ============================================================================ + +EFunction::EFunction(Compiler* c) ASMJIT_NOTHROW : Emittable(c, EMITTABLE_FUNCTION) +{ + _argumentVariables = NULL; + Util::memset32(_hints, INVALID_VALUE, ASMJIT_ARRAY_SIZE(_hints)); + + // Stack is always aligned to 16-bytes when using 64-bit OS. + _isStackAlignedByOsTo16Bytes = CompilerUtil::isStack16ByteAligned(); + + // Manual aligning is autodetected by prepare() method. + _isStackAlignedByFnTo16Bytes = false; + + // Just clear to safe defaults. + _isNaked = false; + _isEspAdjusted = false; + _isCaller = false; + + _pePushPop = true; + _emitEMMS = false; + _emitSFence = false; + _emitLFence = false; + + _finished = false; + + _modifiedAndPreservedGP = 0; + _modifiedAndPreservedMM = 0; + _modifiedAndPreservedXMM = 0; + + _pePushPopStackSize = 0; + _peMovStackSize = 0; + _peAdjustStackSize = 0; + + _memStackSize = 0; + _memStackSize16 = 0; + + _functionCallStackSize = 0; + + _entryLabel = c->newLabel(); + _exitLabel = c->newLabel(); + + _prolog = Compiler_newObject<EProlog>(c, this); + _epilog = Compiler_newObject<EEpilog>(c, this); + _end = Compiler_newObject<EFunctionEnd>(c); +} + +EFunction::~EFunction() ASMJIT_NOTHROW +{ +} + +void EFunction::prepare(CompilerContext& cc) ASMJIT_NOTHROW +{ + _offset = cc._currentOffset++; +} + +int EFunction::getMaxSize() const ASMJIT_NOTHROW +{ + // EFunction is NOP. + return 0; +} + +void EFunction::setPrototype( + uint32_t callingConvention, + const uint32_t* arguments, + uint32_t argumentsCount, + uint32_t returnValue) ASMJIT_NOTHROW +{ + _functionPrototype.setPrototype(callingConvention, arguments, argumentsCount, returnValue); +} + +void EFunction::setHint(uint32_t hint, uint32_t value) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(hint < ASMJIT_ARRAY_SIZE(_hints)); + _hints[hint] = value; +} + +void EFunction::_createVariables() ASMJIT_NOTHROW +{ + uint32_t i, count = _functionPrototype.getArgumentsCount(); + if (count == 0) return; + + _argumentVariables = reinterpret_cast<VarData**>(_compiler->getZone().zalloc(count * sizeof(VarData*))); + if (_argumentVariables == NULL) + { + _compiler->setError(ERROR_NO_HEAP_MEMORY); + return; + } + + char argNameStorage[64]; + char* argName = NULL; + + bool debug = _compiler->getLogger() != NULL; + if (debug) argName = argNameStorage; + + for (i = 0; i < count; i++) + { + FunctionPrototype::Argument& a = _functionPrototype.getArguments()[i]; + if (debug) snprintf(argName, ASMJIT_ARRAY_SIZE(argNameStorage), "arg_%u", i); + + uint32_t size = getVariableSize(a.variableType); + VarData* vdata = _compiler->_newVarData(argName, a.variableType, size); + + if (a.registerIndex != (uint32_t)INVALID_VALUE) + { + vdata->isRegArgument = true; + vdata->registerIndex = a.registerIndex; + } + + if (a.stackOffset != (int32_t)INVALID_VALUE) + { + vdata->isMemArgument = true; + vdata->homeMemoryOffset = a.stackOffset; + } + + _argumentVariables[i] = vdata; + } +} + +void EFunction::_prepareVariables(Emittable* first) ASMJIT_NOTHROW +{ + uint32_t i, count = _functionPrototype.getArgumentsCount(); + if (count == 0) return; + + for (i = 0; i < count; i++) + { + VarData* vdata = _argumentVariables[i]; + + // This is where variable scope starts. + vdata->firstEmittable = first; + // If this will not be changed then it will be deallocated immediately. + vdata->lastEmittable = first; + } +} + +void EFunction::_allocVariables(CompilerContext& cc) ASMJIT_NOTHROW +{ + uint32_t i, count = _functionPrototype.getArgumentsCount(); + if (count == 0) return; + + for (i = 0; i < count; i++) + { + VarData* vdata = _argumentVariables[i]; + + if (vdata->firstEmittable != NULL || + vdata->isRegArgument || + vdata->isMemArgument) + { + // Variable is used. + if (vdata->registerIndex != INVALID_VALUE) + { + vdata->state = VARIABLE_STATE_REGISTER; + // If variable is in register -> mark it as changed so it will not be + // lost by first spill. + vdata->changed = true; + cc._allocatedVariable(vdata); + } + else if (vdata->isMemArgument) + { + vdata->state = VARIABLE_STATE_MEMORY; + } + } + else + { + // Variable is not used. + vdata->registerIndex = INVALID_VALUE; + } + } +} + +void EFunction::_preparePrologEpilog(CompilerContext& cc) ASMJIT_NOTHROW +{ + const CpuInfo* cpuInfo = getCpuInfo(); + + _pePushPop = true; + _emitEMMS = false; + _emitSFence = false; + _emitLFence = false; + + uint32_t accessibleMemoryBelowStack = 0; + if (_functionPrototype.getCallingConvention() == CALL_CONV_X64U) + accessibleMemoryBelowStack = 128; + + if (_isCaller && (cc._memBytesTotal > 0 || _isStackAlignedByOsTo16Bytes)) + _isEspAdjusted = true; + + if (cc._memBytesTotal > accessibleMemoryBelowStack) + _isEspAdjusted = true; + + if (_hints[FUNCTION_HINT_NAKED] != INVALID_VALUE) + _isNaked = (bool)_hints[FUNCTION_HINT_NAKED]; + + if (_hints[FUNCTION_HINT_PUSH_POP_SEQUENCE] != INVALID_VALUE) + _pePushPop = (bool)_hints[FUNCTION_HINT_PUSH_POP_SEQUENCE]; + + if (_hints[FUNCTION_HINT_EMMS] != INVALID_VALUE) + _emitEMMS = (bool)_hints[FUNCTION_HINT_EMMS]; + + if (_hints[FUNCTION_HINT_SFENCE] != INVALID_VALUE) + _emitSFence = (bool)_hints[FUNCTION_HINT_SFENCE]; + + if (_hints[FUNCTION_HINT_LFENCE] != INVALID_VALUE) + _emitLFence = (bool)_hints[FUNCTION_HINT_LFENCE]; + + if (!_isStackAlignedByOsTo16Bytes && !_isNaked && (cc._mem16BlocksCount > 0)) + { + // Have to align stack to 16-bytes. + _isStackAlignedByFnTo16Bytes = true; + _isEspAdjusted = true; + } + + _modifiedAndPreservedGP = cc._modifiedGPRegisters & _functionPrototype.getPreservedGP() & ~Util::maskFromIndex(REG_INDEX_ESP); + _modifiedAndPreservedMM = cc._modifiedMMRegisters & _functionPrototype.getPreservedMM(); + _modifiedAndPreservedXMM = cc._modifiedXMMRegisters & _functionPrototype.getPreservedXMM(); + + _movDqaInstruction = (_isStackAlignedByOsTo16Bytes || !_isNaked) ? INST_MOVDQA : INST_MOVDQU; + + // Prolog & Epilog stack size. + { + int32_t memGP = Util::bitCount(_modifiedAndPreservedGP) * sizeof(sysint_t); + int32_t memMM = Util::bitCount(_modifiedAndPreservedMM) * 8; + int32_t memXMM = Util::bitCount(_modifiedAndPreservedXMM) * 16; + + if (_pePushPop) + { + _pePushPopStackSize = memGP; + _peMovStackSize = memXMM + Util::alignTo16(memMM); + } + else + { + _pePushPopStackSize = 0; + _peMovStackSize = memXMM + Util::alignTo16(memMM + memGP); + } + } + + if (_isStackAlignedByFnTo16Bytes) + { + _peAdjustStackSize += Util::deltaTo16(_pePushPopStackSize); + } + else + { + int32_t v = 16 - sizeof(sysint_t); + if (!_isNaked) v -= sizeof(sysint_t); + + v -= _pePushPopStackSize & 15; + if (v < 0) v += 16; + _peAdjustStackSize = v; + + //_peAdjustStackSize += Util::deltaTo16(_pePushPopStackSize + v); + } + + // Memory stack size. + _memStackSize = cc._memBytesTotal; + _memStackSize16 = Util::alignTo16(_memStackSize); + + if (_isNaked) + { + cc._argumentsBaseReg = REG_INDEX_ESP; + cc._argumentsBaseOffset = (_isEspAdjusted) + ? (_functionCallStackSize + _memStackSize16 + _peMovStackSize + _pePushPopStackSize + _peAdjustStackSize) + : (_pePushPopStackSize); + } + else + { + cc._argumentsBaseReg = REG_INDEX_EBP; + cc._argumentsBaseOffset = sizeof(sysint_t); + } + + cc._variablesBaseReg = REG_INDEX_ESP; + cc._variablesBaseOffset = _functionCallStackSize; + if (!_isEspAdjusted) + cc._variablesBaseOffset = -_memStackSize16 - _peMovStackSize - _peAdjustStackSize; +} + +void EFunction::_dumpFunction(CompilerContext& cc) ASMJIT_NOTHROW +{ + Logger* logger = _compiler->getLogger(); + ASMJIT_ASSERT(logger != NULL); + + uint32_t i; + char _buf[1024]; + char* p; + + // Log function prototype. + { + uint32_t argumentsCount = _functionPrototype.getArgumentsCount(); + bool first = true; + + logger->logString("; Function Prototype:\n"); + logger->logString(";\n"); + + for (i = 0; i < argumentsCount; i++) + { + const FunctionPrototype::Argument& a = _functionPrototype.getArguments()[i]; + VarData* vdata = _argumentVariables[i]; + + if (first) + { + logger->logString("; IDX| Type | Sz | Home |\n"); + logger->logString("; ---+----------+----+----------------+\n"); + } + + char* memHome = memHome = _buf; + + if (a.registerIndex != INVALID_VALUE) + { + BaseReg regOp(a.registerIndex | REG_TYPE_GPN, 0); + dumpOperand(memHome, ®Op)[0] = '\0'; + } + else + { + Mem memOp; + memOp._mem.base = REG_INDEX_ESP; + memOp._mem.displacement = a.stackOffset; + dumpOperand(memHome, &memOp)[0] = '\0'; + } + + logger->logFormat("; %-3u| %-9s| %-3u| %-15s|\n", + // Argument index. + i, + // Argument type. + vdata->type < _VARIABLE_TYPE_COUNT ? variableInfo[vdata->type].name : "invalid", + // Argument size. + vdata->size, + // Argument memory home. + memHome + ); + + first = false; + } + logger->logString(";\n"); + } + + // Log variables. + { + uint32_t variablesCount = (uint32_t)_compiler->_varData.getLength(); + bool first = true; + + logger->logString("; Variables:\n"); + logger->logString(";\n"); + + for (i = 0; i < variablesCount; i++) + { + VarData* vdata = _compiler->_varData[i]; + + // If this variable is not related to this function then skip it. + if (vdata->scope != this) continue; + + // Get some information about variable type. + const VariableInfo& vinfo = variableInfo[vdata->type]; + + if (first) + { + logger->logString("; ID | Type | Sz | Home | Register Access | Memory Access |\n"); + logger->logString("; ---+----------+----+----------------+-------------------+-------------------+\n"); + } + + char* memHome = (char*)"[None]"; + if (vdata->homeMemoryData != NULL) + { + VarMemBlock* memBlock = reinterpret_cast<VarMemBlock*>(vdata->homeMemoryData); + memHome = _buf; + + Mem memOp; + if (vdata->isMemArgument) + { + const FunctionPrototype::Argument& a = _functionPrototype.getArguments()[i]; + + memOp._mem.base = cc._argumentsBaseReg; + memOp._mem.displacement += cc._argumentsBaseOffset; + memOp._mem.displacement += a.stackOffset; + } + else + { + memOp._mem.base = cc._variablesBaseReg; + memOp._mem.displacement += cc._variablesBaseOffset; + memOp._mem.displacement += memBlock->offset; + } + dumpOperand(memHome, &memOp)[0] = '\0'; + } + + logger->logFormat("; %-3u| %-9s| %-3u| %-15s| r=%-4uw=%-4ux=%-4u| r=%-4uw=%-4ux=%-4u|\n", + // Variable id. + (uint)(i & OPERAND_ID_VALUE_MASK), + // Variable type. + vdata->type < _VARIABLE_TYPE_COUNT ? vinfo.name : "invalid", + // Variable size. + vdata->size, + // Variable memory home. + memHome, + // Register access count. + (unsigned int)vdata->registerReadCount, + (unsigned int)vdata->registerWriteCount, + (unsigned int)vdata->registerRWCount, + // Memory access count. + (unsigned int)vdata->memoryReadCount, + (unsigned int)vdata->memoryWriteCount, + (unsigned int)vdata->memoryRWCount + ); + first = false; + } + logger->logString(";\n"); + } + + // Log modified registers. + { + p = _buf; + + uint32_t r; + uint32_t modifiedRegisters = 0; + + for (r = 0; r < 3; r++) + { + bool first = true; + uint32_t regs; + uint32_t type; + + switch (r) + { + case 0: + regs = cc._modifiedGPRegisters; + type = REG_TYPE_GPN; + p = Util::mycpy(p, "; GP : "); + break; + case 1: + regs = cc._modifiedMMRegisters; + type = REG_TYPE_MM; + p = Util::mycpy(p, "; MM : "); + break; + case 2: + regs = cc._modifiedXMMRegisters; + type = REG_TYPE_XMM; + p = Util::mycpy(p, "; XMM: "); + break; + default: + ASMJIT_ASSERT(0); + } + + for (i = 0; i < REG_NUM_BASE; i++) + { + if ((regs & Util::maskFromIndex(i)) != 0) + { + if (!first) { *p++ = ','; *p++ = ' '; } + p = dumpRegister(p, type, i); + first = false; + modifiedRegisters++; + } + } + *p++ = '\n'; + } + *p = '\0'; + + logger->logFormat("; Modified registers (%u):\n", (unsigned int)modifiedRegisters); + logger->logString(_buf); + } + + logger->logString("\n"); +} + +void EFunction::_emitProlog(CompilerContext& cc) ASMJIT_NOTHROW +{ + uint32_t i, mask; + uint32_t preservedGP = _modifiedAndPreservedGP; + uint32_t preservedMM = _modifiedAndPreservedMM; + uint32_t preservedXMM = _modifiedAndPreservedXMM; + + int32_t stackSubtract = + _functionCallStackSize + + _memStackSize16 + + _peMovStackSize + + _peAdjustStackSize; + int32_t nspPos; + + if (_compiler->getLogger()) + { + // Here function prolog starts. + _compiler->comment("Prolog"); + } + + // Emit standard prolog entry code (but don't do it if function is set to be + // naked). + // + // Also see the _prologEpilogStackAdjust variable. If function is naked (so + // prolog and epilog will not contain "push ebp" and "mov ebp, esp", we need + // to adjust stack by 8 bytes in 64-bit mode (this will give us that stack + // will remain aligned to 16 bytes). + if (!_isNaked) + { + _compiler->emit(INST_PUSH, nbp); + _compiler->emit(INST_MOV, nbp, nsp); + } + + // Align manually stack-pointer to 16-bytes. + if (_isStackAlignedByFnTo16Bytes) + { + ASMJIT_ASSERT(!_isNaked); + _compiler->emit(INST_AND, nsp, imm(-16)); + } + + // Save GP registers using PUSH/POP. + if (preservedGP && _pePushPop) + { + for (i = 0, mask = 1; i < REG_NUM_GP; i++, mask <<= 1) + { + if (preservedGP & mask) _compiler->emit(INST_PUSH, gpn(i)); + } + } + + if (_isEspAdjusted) + { + nspPos = _memStackSize16; + if (stackSubtract) _compiler->emit(INST_SUB, nsp, imm(stackSubtract)); + } + else + { + nspPos = -(_peMovStackSize + _peAdjustStackSize); + //if (_pePushPop) nspPos += Util::bitCount(preservedGP) * sizeof(sysint_t); + } + + // Save XMM registers using MOVDQA/MOVDQU. + if (preservedXMM) + { + for (i = 0, mask = 1; i < REG_NUM_XMM; i++, mask <<= 1) + { + if (preservedXMM & mask) + { + _compiler->emit(_movDqaInstruction, dqword_ptr(nsp, nspPos), xmm(i)); + nspPos += 16; + } + } + } + + // Save MM registers using MOVQ. + if (preservedMM) + { + for (i = 0, mask = 1; i < 8; i++, mask <<= 1) + { + if (preservedMM & mask) + { + _compiler->emit(INST_MOVQ, qword_ptr(nsp, nspPos), mm(i)); + nspPos += 8; + } + } + } + + // Save GP registers using MOV. + if (preservedGP && !_pePushPop) + { + for (i = 0, mask = 1; i < REG_NUM_GP; i++, mask <<= 1) + { + if (preservedGP & mask) + { + _compiler->emit(INST_MOV, sysint_ptr(nsp, nspPos), gpn(i)); + nspPos += sizeof(sysint_t); + } + } + } + + if (_compiler->getLogger()) + { + _compiler->comment("Body"); + } +} + +void EFunction::_emitEpilog(CompilerContext& cc) ASMJIT_NOTHROW +{ + const CpuInfo* cpuInfo = getCpuInfo(); + + uint32_t i, mask; + uint32_t preservedGP = _modifiedAndPreservedGP; + uint32_t preservedMM = _modifiedAndPreservedMM; + uint32_t preservedXMM = _modifiedAndPreservedXMM; + + int32_t stackAdd = + _functionCallStackSize + + _memStackSize16 + + _peMovStackSize + + _peAdjustStackSize; + int32_t nspPos; + + nspPos = (_isEspAdjusted) + ? (_memStackSize16) + : -(_peMovStackSize + _peAdjustStackSize); + + if (_compiler->getLogger()) + { + _compiler->comment("Epilog"); + } + + // Restore XMM registers using MOVDQA/MOVDQU. + if (preservedXMM) + { + for (i = 0, mask = 1; i < REG_NUM_XMM; i++, mask <<= 1) + { + if (preservedXMM & mask) + { + _compiler->emit(_movDqaInstruction, xmm(i), dqword_ptr(nsp, nspPos)); + nspPos += 16; + } + } + } + + // Restore MM registers using MOVQ. + if (preservedMM) + { + for (i = 0, mask = 1; i < 8; i++, mask <<= 1) + { + if (preservedMM & mask) + { + _compiler->emit(INST_MOVQ, mm(i), qword_ptr(nsp, nspPos)); + nspPos += 8; + } + } + } + + // Restore GP registers using MOV. + if (preservedGP && !_pePushPop) + { + for (i = 0, mask = 1; i < REG_NUM_GP; i++, mask <<= 1) + { + if (preservedGP & mask) + { + _compiler->emit(INST_MOV, gpn(i), sysint_ptr(nsp, nspPos)); + nspPos += sizeof(sysint_t); + } + } + } + + if (_isEspAdjusted && stackAdd != 0) + _compiler->emit(INST_ADD, nsp, imm(stackAdd)); + + // Restore GP registers using POP. + if (preservedGP && _pePushPop) + { + for (i = REG_NUM_GP - 1, mask = 1 << i; (int32_t)i >= 0; i--, mask >>= 1) + { + if (preservedGP & mask) + { + _compiler->emit(INST_POP, gpn(i)); + } + } + } + + // Emit Emms. + if (_emitEMMS) _compiler->emit(INST_EMMS); + + // Emit SFence / LFence / MFence. + if ( _emitSFence && _emitLFence) _compiler->emit(INST_MFENCE); // MFence == SFence & LFence. + if ( _emitSFence && !_emitLFence) _compiler->emit(INST_SFENCE); // Only SFence. + if (!_emitSFence && _emitLFence) _compiler->emit(INST_LFENCE); // Only LFence. + + // Emit standard epilog leave code (if needed). + if (!_isNaked) + { + if (cpuInfo->vendorId == CPU_VENDOR_AMD) + { + // AMD seems to prefer LEAVE instead of MOV/POP sequence. + _compiler->emit(INST_LEAVE); + } + else + { + _compiler->emit(INST_MOV, nsp, nbp); + _compiler->emit(INST_POP, nbp); + } + } + + // Emit return using correct instruction. + if (_functionPrototype.getCalleePopsStack()) + _compiler->emit(INST_RET, imm((int16_t)_functionPrototype.getArgumentsStackSize())); + else + _compiler->emit(INST_RET); +} + +void EFunction::reserveStackForFunctionCall(int32_t size) +{ + size = Util::alignTo16(size); + + if (size > _functionCallStackSize) _functionCallStackSize = size; + _isCaller = true; +} + +// ============================================================================ +// [AsmJit::EProlog] +// ============================================================================ + +EProlog::EProlog(Compiler* c, EFunction* f) ASMJIT_NOTHROW : + Emittable(c, EMITTABLE_PROLOG), + _function(f) +{ +} + +EProlog::~EProlog() ASMJIT_NOTHROW +{ +} + +void EProlog::prepare(CompilerContext& cc) ASMJIT_NOTHROW +{ + _offset = cc._currentOffset++; + _function->_prepareVariables(this); +} + +Emittable* EProlog::translate(CompilerContext& cc) ASMJIT_NOTHROW +{ + _function->_allocVariables(cc); + return translated(); +} + +// ============================================================================ +// [AsmJit::EEpilog] +// ============================================================================ + +EEpilog::EEpilog(Compiler* c, EFunction* f) ASMJIT_NOTHROW : + Emittable(c, EMITTABLE_EPILOG), + _function(f) +{ +} + +EEpilog::~EEpilog() ASMJIT_NOTHROW +{ +} + +void EEpilog::prepare(CompilerContext& cc) ASMJIT_NOTHROW +{ + _offset = cc._currentOffset++; +} + +Emittable* EEpilog::translate(CompilerContext& cc) ASMJIT_NOTHROW +{ + return translated(); +} + +// ============================================================================ +// [AsmJit::ECall] +// ============================================================================ + +ECall::ECall(Compiler* c, EFunction* caller, const Operand* target) ASMJIT_NOTHROW : + Emittable(c, EMITTABLE_CALL), + _caller(caller), + _target(*target), + _args(NULL), + _gpParams(0), + _mmParams(0), + _xmmParams(0), + _variablesCount(0), + _variables(NULL) +{ +} + +ECall::~ECall() ASMJIT_NOTHROW +{ + memset(_argumentToVarRecord, 0, sizeof(VarCallRecord*) * FUNC_MAX_ARGS); +} + +void ECall::prepare(CompilerContext& cc) ASMJIT_NOTHROW +{ + // Prepare is similar to EInstruction::prepare(). We collect unique variables + // and update statistics, but we don't use standard alloc/free register calls. + // + // The calling function is also unique in variable allocator point of view, + // because we need to alloc some variables that may be destroyed be the + // callee (okay, may not, but this is not guaranteed). + _offset = cc._currentOffset; + + // Tell EFunction that another function will be called inside. It needs this + // information to reserve stack for the call and to mark esp adjustable. + getCaller()->reserveStackForFunctionCall( + (int32_t)getPrototype().getArgumentsStackSize()); + + uint32_t i; + uint32_t argumentsCount = getPrototype().getArgumentsCount(); + uint32_t operandsCount = argumentsCount; + uint32_t variablesCount = 0; + + // Create registers used as arguments mask. + for (i = 0; i < argumentsCount; i++) + { + const FunctionPrototype::Argument& fArg = getPrototype().getArguments()[i]; + + if (fArg.registerIndex != INVALID_VALUE) + { + switch (fArg.variableType) + { + case VARIABLE_TYPE_GPD: + case VARIABLE_TYPE_GPQ: + _gpParams |= Util::maskFromIndex(fArg.registerIndex); + break; + case VARIABLE_TYPE_MM: + _mmParams |= Util::maskFromIndex(fArg.registerIndex); + break; + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + _xmmParams |= Util::maskFromIndex(fArg.registerIndex); + break; + default: + ASMJIT_ASSERT(0); + } + } + else + { + cc.getFunction()->mustAdjustEsp(); + } + } + + // Call address. + operandsCount++; + + // The first and the second return value. + if (!_ret[0].isNone()) operandsCount++; + if (!_ret[1].isNone()) operandsCount++; + +#define __GET_VARIABLE(__vardata__) \ + { \ + VarData* _candidate = __vardata__; \ + \ + for (var = cur; ;) \ + { \ + if (var == _variables) \ + { \ + var = cur++; \ + var->vdata = _candidate; \ + break; \ + } \ + \ + var--; \ + \ + if (var->vdata == _candidate) \ + { \ + break; \ + } \ + } \ + \ + ASMJIT_ASSERT(var != NULL); \ + } + + for (i = 0; i < operandsCount; i++) + { + Operand& o = (i < argumentsCount) + ? (_args[i]) + : (i == argumentsCount ? _target : _ret[i - argumentsCount - 1]); + + if (o.isVar()) + { + ASMJIT_ASSERT(o.getId() != INVALID_VALUE); + VarData* vdata = _compiler->_getVarData(o.getId()); + ASMJIT_ASSERT(vdata != NULL); + + if (vdata->workOffset == _offset) continue; + if (!cc._isActive(vdata)) cc._addActive(vdata); + + vdata->workOffset = _offset; + variablesCount++; + } + else if (o.isMem()) + { + if ((o.getId() & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(o.getId()); + ASMJIT_ASSERT(vdata != NULL); + + cc._markMemoryUsed(vdata); + if (!cc._isActive(vdata)) cc._addActive(vdata); + + continue; + } + else if ((o._mem.base & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(o._mem.base); + ASMJIT_ASSERT(vdata != NULL); + + if (vdata->workOffset == _offset) continue; + if (!cc._isActive(vdata)) cc._addActive(vdata); + + vdata->workOffset = _offset; + variablesCount++; + } + + if ((o._mem.index & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(o._mem.index); + ASMJIT_ASSERT(vdata != NULL); + + if (vdata->workOffset == _offset) continue; + if (!cc._isActive(vdata)) cc._addActive(vdata); + + vdata->workOffset = _offset; + variablesCount++; + } + } + } + + // Traverse all active variables and set their firstCallable pointer to this + // call. This information can be used to choose between the preserved-first + // and preserved-last register allocation. + if (cc._active) + { + VarData* first = cc._active; + VarData* active = first; + do { + if (active->firstCallable == NULL) active->firstCallable = this; + active = active->nextActive; + } while (active != first); + } + + if (!variablesCount) + { + cc._currentOffset++; + return; + } + + _variables = reinterpret_cast<VarCallRecord*>(_compiler->getZone().zalloc(sizeof(VarCallRecord) * variablesCount)); + if (!_variables) + { + _compiler->setError(ERROR_NO_HEAP_MEMORY); + cc._currentOffset++; + return; + } + + _variablesCount = variablesCount; + memset(_variables, 0, sizeof(VarCallRecord) * variablesCount); + + VarCallRecord* cur = _variables; + VarCallRecord* var = NULL; + + for (i = 0; i < operandsCount; i++) + { + Operand& o = (i < argumentsCount) + ? (_args[i]) + : (i == argumentsCount ? _target : _ret[i - argumentsCount - 1]); + + if (o.isVar()) + { + VarData* vdata = _compiler->_getVarData(o.getId()); + ASMJIT_ASSERT(vdata != NULL); + + __GET_VARIABLE(vdata) + _argumentToVarRecord[i] = var; + + if (i < argumentsCount) + { + const FunctionPrototype::Argument& fArg = getPrototype().getArguments()[i]; + + if (fArg.registerIndex != INVALID_VALUE) + { + cc._newRegisterHomeIndex(vdata, fArg.registerIndex); + + switch (fArg.variableType) + { + case VARIABLE_TYPE_GPD: + case VARIABLE_TYPE_GPQ: + var->flags |= VarCallRecord::FLAG_IN_GP; + var->inCount++; + break; + + case VARIABLE_TYPE_MM: + var->flags |= VarCallRecord::FLAG_IN_MM; + var->inCount++; + break; + + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + var->flags |= VarCallRecord::FLAG_IN_XMM; + var->inCount++; + break; + + default: + ASMJIT_ASSERT(0); + } + } + else + { + var->inCount++; + } + + vdata->registerReadCount++; + } + else if (i == argumentsCount) + { + uint32_t mask = ~getPrototype().getPreservedGP() & + ~getPrototype().getPassedGP() & + Util::maskUpToIndex(REG_NUM_GP); + + cc._newRegisterHomeIndex(vdata, Util::findFirstBit(mask)); + cc._newRegisterHomeMask(vdata, mask); + + var->flags |= VarCallRecord::FLAG_CALL_OPERAND_REG; + vdata->registerReadCount++; + } + else + { + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + case VARIABLE_TYPE_GPQ: + if (i == argumentsCount+1) + var->flags |= VarCallRecord::FLAG_OUT_EAX; + else + var->flags |= VarCallRecord::FLAG_OUT_EDX; + break; + + case VARIABLE_TYPE_X87: + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_X87_1D: +#if defined(ASMJIT_X86) + if (i == argumentsCount+1) + var->flags |= VarCallRecord::FLAG_OUT_ST0; + else + var->flags |= VarCallRecord::FLAG_OUT_ST1; +#else + if (i == argumentsCount+1) + var->flags |= VarCallRecord::FLAG_OUT_XMM0; + else + var->flags |= VarCallRecord::FLAG_OUT_XMM1; +#endif + break; + + case VARIABLE_TYPE_MM: + var->flags |= VarCallRecord::FLAG_OUT_MM0; + break; + + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_2D: + if (i == argumentsCount+1) + var->flags |= VarCallRecord::FLAG_OUT_XMM0; + else + var->flags |= VarCallRecord::FLAG_OUT_XMM1; + break; + + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_1D: +#if defined(ASMJIT_X86) + if (i == argumentsCount+1) + var->flags |= VarCallRecord::FLAG_OUT_ST0; + else + var->flags |= VarCallRecord::FLAG_OUT_ST1; +#else + if (i == argumentsCount+1) + var->flags |= VarCallRecord::FLAG_OUT_XMM0; + else + var->flags |= VarCallRecord::FLAG_OUT_XMM1; +#endif + break; + + default: + ASMJIT_ASSERT(0); + } + + vdata->registerWriteCount++; + } + } + else if (o.isMem()) + { + ASMJIT_ASSERT(i == argumentsCount); + + if ((o.getId() & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(o.getId()); + ASMJIT_ASSERT(vdata != NULL); + + vdata->memoryReadCount++; + } + else if ((o._mem.base & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(reinterpret_cast<Mem&>(o).getBase()); + ASMJIT_ASSERT(vdata != NULL); + + vdata->registerReadCount++; + + __GET_VARIABLE(vdata) + var->flags |= VarCallRecord::FLAG_CALL_OPERAND_REG | VarCallRecord::FLAG_CALL_OPERAND_MEM; + } + + if ((o._mem.index & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(reinterpret_cast<Mem&>(o).getIndex()); + ASMJIT_ASSERT(vdata != NULL); + + vdata->registerReadCount++; + + __GET_VARIABLE(vdata) + var->flags |= VarCallRecord::FLAG_CALL_OPERAND_REG | VarCallRecord::FLAG_CALL_OPERAND_MEM; + } + } + } + + // Traverse all variables and update firstEmittable / lastEmittable. This + // function is called from iterator that scans emittables using forward + // direction so we can use this knowledge to optimize the process. + // + // Same code is in EInstruction::prepare(). + for (i = 0; i < _variablesCount; i++) + { + VarData* v = _variables[i].vdata; + + // First emittable (begin of variable scope). + if (v->firstEmittable == NULL) v->firstEmittable = this; + + // Last emittable (end of variable scope). + v->lastEmittable = this; + } + + cc._currentOffset++; + +#undef __GET_VARIABLE +} + +Emittable* ECall::translate(CompilerContext& cc) ASMJIT_NOTHROW +{ + uint32_t i; + uint32_t preserved, mask; + + uint32_t temporaryGpReg; + uint32_t temporaryXmmReg; + + uint32_t offset = cc._currentOffset; + Compiler* compiler = cc.getCompiler(); + + // Constants. + const FunctionPrototype::Argument* targs = getPrototype().getArguments(); + + uint32_t argumentsCount = getPrototype().getArgumentsCount(); + uint32_t variablesCount = _variablesCount; + + // Processed arguments. + uint8_t processed[FUNC_MAX_ARGS] = { 0 }; + + compiler->comment("Function Call"); + + // These variables are used by the instruction and we set current offset + // to their work offsets -> The getSpillCandidate() method never returns + // the variable used by this instruction. + for (i = 0; i < variablesCount; i++) + { + _variables[i].vdata->workOffset = offset; + + // Init back-reference to VarCallRecord. + _variables[i].vdata->tempPtr = &_variables[i]; + } + + // -------------------------------------------------------------------------- + // STEP 1: + // + // Spill variables which are not used by the function call and have to + // be destroyed. These registers may be used by callee. + // -------------------------------------------------------------------------- + + preserved = getPrototype().getPreservedGP(); + for (i = 0, mask = 1; i < REG_NUM_GP; i++, mask <<= 1) + { + VarData* vdata = cc._state.gp[i]; + if (vdata && vdata->workOffset != offset && (preserved & mask) == 0) + { + cc.spillGPVar(vdata); + } + } + + preserved = getPrototype().getPreservedMM(); + for (i = 0, mask = 1; i < REG_NUM_MM; i++, mask <<= 1) + { + VarData* vdata = cc._state.mm[i]; + if (vdata && vdata->workOffset != offset && (preserved & mask) == 0) + { + cc.spillMMVar(vdata); + } + } + + preserved = getPrototype().getPreservedXMM(); + for (i = 0, mask = 1; i < REG_NUM_XMM; i++, mask <<= 1) + { + VarData* vdata = cc._state.xmm[i]; + if (vdata && vdata->workOffset != offset && (preserved & mask) == 0) + { + cc.spillXMMVar(vdata); + } + } + + // -------------------------------------------------------------------------- + // STEP 2: + // + // Move all arguments to the stack which all already in registers. + // -------------------------------------------------------------------------- + + for (i = 0; i < argumentsCount; i++) + { + if (processed[i]) continue; + + const FunctionPrototype::Argument& argType = targs[i]; + if (argType.registerIndex != INVALID_VALUE) continue; + + Operand& operand = _args[i]; + + if (operand.isVar()) + { + VarCallRecord* rec = _argumentToVarRecord[i]; + VarData* vdata = compiler->_getVarData(operand.getId()); + + if (vdata->registerIndex != INVALID_VALUE) + { + _moveAllocatedVariableToStack(cc, + vdata, argType); + + rec->inDone++; + processed[i] = true; + } + } + } + + // -------------------------------------------------------------------------- + // STEP 3: + // + // Spill all non-preserved variables we moved to stack in STEP #2. + // -------------------------------------------------------------------------- + + for (i = 0; i < argumentsCount; i++) + { + VarCallRecord* rec = _argumentToVarRecord[i]; + if (!rec || processed[i]) continue; + + if (rec->inDone >= rec->inCount) + { + VarData* vdata = rec->vdata; + if (vdata->registerIndex == INVALID_VALUE) continue; + + if (rec->outCount) + { + // Variable will be rewritten by function return value, it's not needed + // to spill it. It will be allocated again by ECall. + cc.unuseVar(rec->vdata, VARIABLE_STATE_UNUSED); + } + else + { + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + case VARIABLE_TYPE_GPQ: + if ((getPrototype().getPreservedGP() & Util::maskFromIndex(vdata->registerIndex)) == 0) + cc.spillGPVar(vdata); + break; + case VARIABLE_TYPE_MM: + if ((getPrototype().getPreservedMM() & Util::maskFromIndex(vdata->registerIndex)) == 0) + cc.spillMMVar(vdata); + break; + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_2D: + if ((getPrototype().getPreservedXMM() & Util::maskFromIndex(vdata->registerIndex)) == 0) + cc.spillXMMVar(vdata); + break; + } + } + } + } + + // -------------------------------------------------------------------------- + // STEP 4: + // + // Get temporary register that we can use to pass input function arguments. + // Now it's safe to do, because the non-needed variables should be spilled. + // -------------------------------------------------------------------------- + + temporaryGpReg = _findTemporaryGpRegister(cc); + temporaryXmmReg = _findTemporaryXmmRegister(cc); + + // If failed to get temporary register then we need just to pick one. + if (temporaryGpReg == INVALID_VALUE) + { + // TODO. + } + if (temporaryXmmReg == INVALID_VALUE) + { + // TODO. + } + + // -------------------------------------------------------------------------- + // STEP 5: + // + // Move all remaining arguments to the stack (we can use temporary register). + // or allocate it to the primary register. Also move immediates. + // -------------------------------------------------------------------------- + + for (i = 0; i < argumentsCount; i++) + { + if (processed[i]) continue; + + const FunctionPrototype::Argument& argType = targs[i]; + if (argType.registerIndex != INVALID_VALUE) continue; + + Operand& operand = _args[i]; + + if (operand.isVar()) + { + VarCallRecord* rec = _argumentToVarRecord[i]; + VarData* vdata = compiler->_getVarData(operand.getId()); + + _moveSpilledVariableToStack(cc, + vdata, argType, + temporaryGpReg, temporaryXmmReg); + + rec->inDone++; + processed[i] = true; + } + else if (operand.isImm()) + { + // TODO. + } + } + + // -------------------------------------------------------------------------- + // STEP 6: + // + // Allocate arguments to registers. + // -------------------------------------------------------------------------- + + bool didWork; + + do { + didWork = false; + + for (i = 0; i < argumentsCount; i++) + { + if (processed[i]) continue; + + VarCallRecord* rsrc = _argumentToVarRecord[i]; + + Operand& osrc = _args[i]; + ASMJIT_ASSERT(osrc.isVar()); + VarData* vsrc = compiler->_getVarData(osrc.getId()); + + const FunctionPrototype::Argument& srcArgType = targs[i]; + VarData* vdst = _getOverlappingVariable(cc, srcArgType); + + if (vsrc == vdst) + { + rsrc->inDone++; + processed[i] = true; + + didWork = true; + continue; + } + else if (vdst != NULL) + { + VarCallRecord* rdst = reinterpret_cast<VarCallRecord*>(vdst->tempPtr); + + if (rdst->inDone >= rdst->inCount && (rdst->flags & VarCallRecord::FLAG_CALL_OPERAND_REG) == 0) + { + // Safe to spill. + if (rdst->outCount || vdst->lastEmittable == this) + cc.unuseVar(vdst, VARIABLE_STATE_UNUSED); + else + cc.spillVar(vdst); + vdst = NULL; + } + else + { + uint32_t x = getPrototype().findArgumentByRegisterCode( + getVariableRegisterCode(vsrc->type, vsrc->registerIndex)); + bool doSpill = true; + + if ((getVariableClass(vdst->type) & VariableInfo::CLASS_GP) != 0) + { + // Try to emit mov to register which is possible for call() operand. + if (x == INVALID_VALUE && (rdst->flags & VarCallRecord::FLAG_CALL_OPERAND_REG) != 0) + { + uint32_t rIndex; + uint32_t rBit; + + // The mask which contains registers which are not-preserved + // (these that might be clobbered by the callee) and which are + // not used to pass function arguments. Each register contained + // in this mask is ideal to be used by call() instruction. + uint32_t possibleMask = ~getPrototype().getPreservedGP() & + ~getPrototype().getPassedGP() & + Util::maskUpToIndex(REG_NUM_GP); + + if (possibleMask != 0) + { + for (rIndex = 0, rBit = 1; rIndex < REG_NUM_GP; rIndex++, rBit <<= 1) + { + if ((possibleMask & rBit) != 0) + { + if (cc._state.gp[rIndex] == NULL) + { + // This is the best possible solution, the register is + // free. We do not need to continue with this loop, the + // rIndex will be used by the call(). + break; + } + else + { + // Wait until the register is freed or try to find another. + doSpill = false; + didWork = true; + } + } + } + } + else + { + // Try to find a register which is free and which is not used + // to pass a function argument. + possibleMask = getPrototype().getPreservedGP(); + + for (rIndex = 0, rBit = 1; rIndex < REG_NUM_GP; rIndex++, rBit <<= 1) + { + if ((possibleMask & rBit) != 0) + { + // Found one. + if (cc._state.gp[rIndex] == NULL) break; + } + } + } + + if (rIndex < REG_NUM_GP) + { + if (temporaryGpReg == vsrc->registerIndex) temporaryGpReg = rIndex; + compiler->emit(INST_MOV, gpn(rIndex), gpn(vsrc->registerIndex)); + + cc._state.gp[vsrc->registerIndex] = NULL; + cc._state.gp[rIndex] = vsrc; + + vsrc->registerIndex = rIndex; + cc._allocatedGPRegister(rIndex); + + doSpill = false; + didWork = true; + } + } + // Emit xchg instead of spill/alloc if possible. + else if (x != INVALID_VALUE) + { + const FunctionPrototype::Argument& dstArgType = targs[x]; + if (getVariableClass(dstArgType.variableType) == getVariableClass(srcArgType.variableType)) + { + uint32_t dstIndex = vdst->registerIndex; + uint32_t srcIndex = vsrc->registerIndex; + + if (srcIndex == dstArgType.registerIndex) + { +#if defined(ASMJIT_X64) + if (vdst->type != VARIABLE_TYPE_GPD || vsrc->type != VARIABLE_TYPE_GPD) + compiler->emit(INST_XCHG, gpq(dstIndex), gpq(srcIndex)); + else +#endif + compiler->emit(INST_XCHG, gpd(dstIndex), gpd(srcIndex)); + + cc._state.gp[srcIndex] = vdst; + cc._state.gp[dstIndex] = vsrc; + + vdst->registerIndex = srcIndex; + vsrc->registerIndex = dstIndex; + + rdst->inDone++; + rsrc->inDone++; + + processed[i] = true; + processed[x] = true; + + doSpill = false; + } + } + } + } + + if (doSpill) + { + cc.spillVar(vdst); + vdst = NULL; + } + } + } + + if (vdst == NULL) + { + VarCallRecord* rec = reinterpret_cast<VarCallRecord*>(vsrc->tempPtr); + + _moveSrcVariableToRegister(cc, vsrc, srcArgType); + + switch (srcArgType.variableType) + { + case VARIABLE_TYPE_GPD: + case VARIABLE_TYPE_GPQ: + cc._markGPRegisterModified(srcArgType.registerIndex); + break; + case VARIABLE_TYPE_MM: + cc._markMMRegisterModified(srcArgType.registerIndex); + break; + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_2D: + cc._markMMRegisterModified(srcArgType.registerIndex); + break; + } + + rec->inDone++; + processed[i] = true; + } + } + } while (didWork); + + // -------------------------------------------------------------------------- + // STEP 7: + // + // Allocate operand used by CALL instruction. + // -------------------------------------------------------------------------- + + for (i = 0; i < variablesCount; i++) + { + VarCallRecord& r = _variables[i]; + if ((r.flags & VarCallRecord::FLAG_CALL_OPERAND_REG) && + (r.vdata->registerIndex == INVALID_VALUE)) + { + // If the register is not allocated and the call form is 'call reg' then + // it's possible to keep it in memory. + if ((r.flags & VarCallRecord::FLAG_CALL_OPERAND_MEM) == 0) + { + _target = GPVarFromData(r.vdata).m(); + break; + } + + if (temporaryGpReg == INVALID_VALUE) + temporaryGpReg = _findTemporaryGpRegister(cc); + + cc.allocGPVar(r.vdata, + Util::maskFromIndex(temporaryGpReg), + VARIABLE_ALLOC_REGISTER | VARIABLE_ALLOC_READ); + } + } + + cc.translateOperands(&_target, 1); + + // -------------------------------------------------------------------------- + // STEP 8: + // + // Spill all preserved variables. + // -------------------------------------------------------------------------- + + preserved = getPrototype().getPreservedGP(); + for (i = 0, mask = 1; i < REG_NUM_GP; i++, mask <<= 1) + { + VarData* vdata = cc._state.gp[i]; + if (vdata && (preserved & mask) == 0) + { + VarCallRecord* rec = reinterpret_cast<VarCallRecord*>(vdata->tempPtr); + if (rec && (rec->outCount || rec->flags & VarCallRecord::FLAG_UNUSE_AFTER_USE || vdata->lastEmittable == this)) + cc.unuseVar(vdata, VARIABLE_STATE_UNUSED); + else + cc.spillGPVar(vdata); + } + } + + preserved = getPrototype().getPreservedMM(); + for (i = 0, mask = 1; i < REG_NUM_MM; i++, mask <<= 1) + { + VarData* vdata = cc._state.mm[i]; + if (vdata && (preserved & mask) == 0) + { + VarCallRecord* rec = reinterpret_cast<VarCallRecord*>(vdata->tempPtr); + if (rec && (rec->outCount || vdata->lastEmittable == this)) + cc.unuseVar(vdata, VARIABLE_STATE_UNUSED); + else + cc.spillMMVar(vdata); + } + } + + preserved = getPrototype().getPreservedXMM(); + for (i = 0, mask = 1; i < REG_NUM_XMM; i++, mask <<= 1) + { + VarData* vdata = cc._state.xmm[i]; + if (vdata && (preserved & mask) == 0) + { + VarCallRecord* rec = reinterpret_cast<VarCallRecord*>(vdata->tempPtr); + if (rec && (rec->outCount || vdata->lastEmittable == this)) + cc.unuseVar(vdata, VARIABLE_STATE_UNUSED); + else + cc.spillXMMVar(vdata); + } + } + + // -------------------------------------------------------------------------- + // STEP 9: + // + // Emit CALL instruction. + // -------------------------------------------------------------------------- + + compiler->emit(INST_CALL, _target); + + // Restore the stack offset. + if (getPrototype().getCalleePopsStack()) + { + int32_t s = (int32_t)getPrototype().getArgumentsStackSize(); + if (s) compiler->emit(INST_SUB, nsp, imm(s)); + } + + // -------------------------------------------------------------------------- + // STEP 10: + // + // Prepare others for return value(s) and cleanup. + // -------------------------------------------------------------------------- + + // Clear temp data, see AsmJit::VarData::temp why it's needed. + for (i = 0; i < variablesCount; i++) + { + VarCallRecord* rec = &_variables[i]; + VarData* vdata = rec->vdata; + + if (rec->flags & (VarCallRecord::FLAG_OUT_EAX | VarCallRecord::FLAG_OUT_EDX)) + { + if (getVariableClass(vdata->type) & VariableInfo::CLASS_GP) + { + cc.allocGPVar(vdata, + Util::maskFromIndex((rec->flags & VarCallRecord::FLAG_OUT_EAX) != 0 + ? REG_INDEX_EAX + : REG_INDEX_EDX), + VARIABLE_ALLOC_REGISTER | VARIABLE_ALLOC_WRITE); + vdata->changed = true; + } + } + + if (rec->flags & (VarCallRecord::FLAG_OUT_MM0)) + { + if (getVariableClass(vdata->type) & VariableInfo::CLASS_MM) + { + cc.allocMMVar(vdata, Util::maskFromIndex(REG_INDEX_MM0), + VARIABLE_ALLOC_REGISTER | VARIABLE_ALLOC_WRITE); + vdata->changed = true; + } + } + + if (rec->flags & (VarCallRecord::FLAG_OUT_XMM0 | VarCallRecord::FLAG_OUT_XMM1)) + { + if (getVariableClass(vdata->type) & VariableInfo::CLASS_XMM) + { + cc.allocXMMVar(vdata, + Util::maskFromIndex((rec->flags & VarCallRecord::FLAG_OUT_XMM0) != 0 + ? REG_INDEX_XMM0 + : REG_INDEX_XMM1), + VARIABLE_ALLOC_REGISTER | VARIABLE_ALLOC_WRITE); + vdata->changed = true; + } + } + + if (rec->flags & (VarCallRecord::FLAG_OUT_ST0 | VarCallRecord::FLAG_OUT_ST1)) + { + if (getVariableClass(vdata->type) & VariableInfo::CLASS_XMM) + { + Mem mem(cc._getVarMem(vdata)); + cc.unuseVar(vdata, VARIABLE_STATE_MEMORY); + + switch (vdata->type) + { + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + { + mem.setSize(4); + compiler->emit(INST_FSTP, mem); + break; + } + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + { + mem.setSize(8); + compiler->emit(INST_FSTP, mem); + break; + } + default: + { + compiler->comment("*** WARNING: Can't convert float return value to untyped XMM\n"); + break; + } + } + } + } + + // Cleanup. + vdata->tempPtr = NULL; + } + + for (i = 0; i < variablesCount; i++) + { + cc._unuseVarOnEndOfScope(this, &_variables[i]); + } + + return translated(); +} + +int ECall::getMaxSize() const ASMJIT_NOTHROW +{ + // TODO: Not optimal. + return 15; +} + +bool ECall::_tryUnuseVar(VarData* v) ASMJIT_NOTHROW +{ + for (uint32_t i = 0; i < _variablesCount; i++) + { + if (_variables[i].vdata == v) + { + _variables[i].flags |= VarCallRecord::FLAG_UNUSE_AFTER_USE; + return true; + } + } + + return false; +} + +uint32_t ECall::_findTemporaryGpRegister(CompilerContext& cc) ASMJIT_NOTHROW +{ + uint32_t i; + uint32_t mask; + + uint32_t passedGP = getPrototype().getPassedGP(); + uint32_t candidate = INVALID_VALUE; + + // Find all registers used to pass function arguments. We shouldn't use these + // if possible. + for (i = 0, mask = 1; i < REG_NUM_GP; i++, mask <<= 1) + { + if (cc._state.gp[i] == NULL) + { + // If this register is used to pass arguments to function, we will mark + // it and use it only if there is no other one. + if ((passedGP & mask) != 0) + candidate = i; + else + return i; + } + } + + return candidate; +} + +uint32_t ECall::_findTemporaryXmmRegister(CompilerContext& cc) ASMJIT_NOTHROW +{ + uint32_t i; + uint32_t mask; + + uint32_t passedXMM = getPrototype().getPassedXMM(); + uint32_t candidate = INVALID_VALUE; + + // Find all registers used to pass function arguments. We shouldn't use these + // if possible. + for (i = 0, mask = 1; i < REG_NUM_XMM; i++, mask <<= 1) + { + if (cc._state.xmm[i] == NULL) + { + // If this register is used to pass arguments to function, we will mark + // it and use it only if there is no other one. + if ((passedXMM & mask) != 0) + candidate = i; + else + return i; + } + } + + return candidate; +} + +VarData* ECall::_getOverlappingVariable(CompilerContext& cc, + const FunctionPrototype::Argument& argType) const ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(argType.variableType != INVALID_VALUE); + + switch (argType.variableType) + { + case VARIABLE_TYPE_GPD: + case VARIABLE_TYPE_GPQ: + return cc._state.gp[argType.registerIndex]; + case VARIABLE_TYPE_MM: + return cc._state.mm[argType.registerIndex]; + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_2D: + return cc._state.xmm[argType.registerIndex]; + } + + return NULL; +} + +void ECall::_moveAllocatedVariableToStack(CompilerContext& cc, VarData* vdata, const FunctionPrototype::Argument& argType) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(argType.registerIndex == INVALID_VALUE); + ASMJIT_ASSERT(vdata->registerIndex != INVALID_VALUE); + + Compiler* compiler = cc.getCompiler(); + + uint32_t src = vdata->registerIndex; + Mem dst = ptr(nsp, -(int)sizeof(sysint_t) + argType.stackOffset); + + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + switch (argType.variableType) + { + case VARIABLE_TYPE_GPD: + compiler->emit(INST_MOV, dst, gpd(src)); + return; +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOV, dst, gpq(src)); + return; +#endif // ASMJIT_X64 + } + break; + +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + switch (argType.variableType) + { + case VARIABLE_TYPE_GPD: + compiler->emit(INST_MOV, dst, gpd(src)); + return; + case VARIABLE_TYPE_GPQ: + compiler->emit(INST_MOV, dst, gpq(src)); + return; + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOVQ, dst, gpq(src)); + return; + } + break; +#endif // ASMJIT_X64 + + case VARIABLE_TYPE_MM: + switch (argType.variableType) + { + case VARIABLE_TYPE_GPD: + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_XMM_1F: + compiler->emit(INST_MOVD, dst, mm(src)); + return; + case VARIABLE_TYPE_GPQ: + case VARIABLE_TYPE_MM: + case VARIABLE_TYPE_X87_1D: + case VARIABLE_TYPE_XMM_1D: + compiler->emit(INST_MOVQ, dst, mm(src)); + return; + } + break; + + // We allow incompatible types here, because the called can convert them + // to correct format before function is called. + + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_2D: + switch (argType.variableType) + { + case VARIABLE_TYPE_XMM: + compiler->emit(INST_MOVDQU, dst, xmm(src)); + return; + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + compiler->emit(INST_MOVUPS, dst, xmm(src)); + return; + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + compiler->emit(INST_MOVUPD, dst, xmm(src)); + return; + } + break; + + case VARIABLE_TYPE_XMM_1F: + switch (argType.variableType) + { + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + compiler->emit(INST_MOVSS, dst, xmm(src)); + return; + } + break; + + case VARIABLE_TYPE_XMM_1D: + switch (argType.variableType) + { + case VARIABLE_TYPE_X87_1D: + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + compiler->emit(INST_MOVSD, dst, xmm(src)); + return; + } + break; + } + + compiler->setError(ERROR_INCOMPATIBLE_ARGUMENT); +} + +void ECall::_moveSpilledVariableToStack(CompilerContext& cc, + VarData* vdata, const FunctionPrototype::Argument& argType, + uint32_t temporaryGpReg, + uint32_t temporaryXmmReg) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(argType.registerIndex == INVALID_VALUE); + ASMJIT_ASSERT(vdata->registerIndex == INVALID_VALUE); + + Compiler* compiler = cc.getCompiler(); + + Mem src = cc._getVarMem(vdata); + Mem dst = ptr(nsp, -(int)sizeof(sysint_t) + argType.stackOffset); + + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + switch (argType.variableType) + { + case VARIABLE_TYPE_GPD: + compiler->emit(INST_MOV, gpd(temporaryGpReg), src); + compiler->emit(INST_MOV, dst, gpd(temporaryGpReg)); + return; +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOV, gpd(temporaryGpReg), src); + compiler->emit(INST_MOV, dst, gpq(temporaryGpReg)); + return; +#endif // ASMJIT_X64 + } + break; + +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + switch (argType.variableType) + { + case VARIABLE_TYPE_GPD: + compiler->emit(INST_MOV, gpd(temporaryGpReg), src); + compiler->emit(INST_MOV, dst, gpd(temporaryGpReg)); + return; + case VARIABLE_TYPE_GPQ: + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOV, gpq(temporaryGpReg), src); + compiler->emit(INST_MOV, dst, gpq(temporaryGpReg)); + return; + } + break; +#endif // ASMJIT_X64 + + case VARIABLE_TYPE_MM: + switch (argType.variableType) + { + case VARIABLE_TYPE_GPD: + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_XMM_1F: + compiler->emit(INST_MOV, gpd(temporaryGpReg), src); + compiler->emit(INST_MOV, dst, gpd(temporaryGpReg)); + return; + case VARIABLE_TYPE_GPQ: + case VARIABLE_TYPE_MM: + case VARIABLE_TYPE_X87_1D: + case VARIABLE_TYPE_XMM_1D: + // TODO + return; + } + break; + + // We allow incompatible types here, because the called can convert them + // to correct format before function is called. + + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_2D: + switch (argType.variableType) + { + case VARIABLE_TYPE_XMM: + compiler->emit(INST_MOVDQU, xmm(temporaryXmmReg), src); + compiler->emit(INST_MOVDQU, dst, xmm(temporaryXmmReg)); + return; + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + compiler->emit(INST_MOVUPS, xmm(temporaryXmmReg), src); + compiler->emit(INST_MOVUPS, dst, xmm(temporaryXmmReg)); + return; + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + compiler->emit(INST_MOVUPD, xmm(temporaryXmmReg), src); + compiler->emit(INST_MOVUPD, dst, xmm(temporaryXmmReg)); + return; + } + break; + + case VARIABLE_TYPE_XMM_1F: + switch (argType.variableType) + { + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + compiler->emit(INST_MOVSS, xmm(temporaryXmmReg), src); + compiler->emit(INST_MOVSS, dst, xmm(temporaryXmmReg)); + return; + } + break; + + case VARIABLE_TYPE_XMM_1D: + switch (argType.variableType) + { + case VARIABLE_TYPE_X87_1D: + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + compiler->emit(INST_MOVSD, xmm(temporaryXmmReg), src); + compiler->emit(INST_MOVSD, dst, xmm(temporaryXmmReg)); + return; + } + break; + } + + compiler->setError(ERROR_INCOMPATIBLE_ARGUMENT); +} + +void ECall::_moveSrcVariableToRegister(CompilerContext& cc, + VarData* vdata, const FunctionPrototype::Argument& argType) ASMJIT_NOTHROW +{ + uint32_t dst = argType.registerIndex; + uint32_t src = vdata->registerIndex; + + Compiler* compiler = cc.getCompiler(); + + if (src != INVALID_VALUE) + { + switch (argType.variableType) + { + case VARIABLE_TYPE_GPD: + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: +#endif // ASMJIT_X64 + compiler->emit(INST_MOV, gpd(dst), gpd(src)); + return; + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOVD, gpd(dst), mm(src)); + return; + } + break; + +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + compiler->emit(INST_MOV, gpd(dst), gpd(src)); + return; + case VARIABLE_TYPE_GPQ: + compiler->emit(INST_MOV, gpq(dst), gpq(src)); + return; + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOVQ, gpq(dst), mm(src)); + return; + } + break; +#endif // ASMJIT_X64 + + case VARIABLE_TYPE_MM: + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + compiler->emit(INST_MOVD, gpd(dst), gpd(src)); + return; +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + compiler->emit(INST_MOVQ, gpq(dst), gpq(src)); + return; +#endif // ASMJIT_X64 + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOVQ, mm(dst), mm(src)); + return; + } + break; + + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_2D: + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + compiler->emit(INST_MOVD, xmm(dst), gpd(src)); + return; +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + compiler->emit(INST_MOVQ, xmm(dst), gpq(src)); + return; +#endif // ASMJIT_X64 + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOVQ, xmm(dst), mm(src)); + return; + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + compiler->emit(INST_MOVDQA, xmm(dst), xmm(src)); + return; + } + break; + + case VARIABLE_TYPE_XMM_1F: + switch (vdata->type) + { + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOVQ, xmm(dst), mm(src)); + return; + + case VARIABLE_TYPE_XMM: + compiler->emit(INST_MOVDQA, xmm(dst), xmm(src)); + return; + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + compiler->emit(INST_MOVSS, xmm(dst), xmm(src)); + return; + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + compiler->emit(INST_CVTSD2SS, xmm(dst), xmm(src)); + return; + } + break; + + case VARIABLE_TYPE_XMM_1D: + switch (vdata->type) + { + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOVQ, xmm(dst), mm(src)); + return; + + case VARIABLE_TYPE_XMM: + compiler->emit(INST_MOVDQA, xmm(dst), xmm(src)); + return; + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + compiler->emit(INST_CVTSS2SD, xmm(dst), xmm(src)); + return; + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + compiler->emit(INST_MOVSD, xmm(dst), xmm(src)); + return; + } + break; + } + } + else + { + Mem mem = cc._getVarMem(vdata); + + switch (argType.variableType) + { + case VARIABLE_TYPE_GPD: + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: +#endif // ASMJIT_X64 + compiler->emit(INST_MOV, gpd(dst), mem); + return; + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOVD, gpd(dst), mem); + return; + } + break; + +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + compiler->emit(INST_MOV, gpd(dst), mem); + return; + case VARIABLE_TYPE_GPQ: + compiler->emit(INST_MOV, gpq(dst), mem); + return; + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOVQ, gpq(dst), mem); + return; + } + break; +#endif // ASMJIT_X64 + + case VARIABLE_TYPE_MM: + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + compiler->emit(INST_MOVD, gpd(dst), mem); + return; +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + compiler->emit(INST_MOVQ, gpq(dst), mem); + return; +#endif // ASMJIT_X64 + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOVQ, mm(dst), mem); + return; + } + break; + + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_2D: + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + compiler->emit(INST_MOVD, xmm(dst), mem); + return; +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + compiler->emit(INST_MOVQ, xmm(dst), mem); + return; +#endif // ASMJIT_X64 + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOVQ, xmm(dst), mem); + return; + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + compiler->emit(INST_MOVDQA, xmm(dst), mem); + return; + } + break; + + case VARIABLE_TYPE_XMM_1F: + switch (vdata->type) + { + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOVQ, xmm(dst), mem); + return; + + case VARIABLE_TYPE_XMM: + compiler->emit(INST_MOVDQA, xmm(dst), mem); + return; + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + compiler->emit(INST_MOVSS, xmm(dst), mem); + return; + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + compiler->emit(INST_CVTSD2SS, xmm(dst), mem); + return; + } + break; + + case VARIABLE_TYPE_XMM_1D: + switch (vdata->type) + { + case VARIABLE_TYPE_MM: + compiler->emit(INST_MOVQ, xmm(dst), mem); + return; + + case VARIABLE_TYPE_XMM: + compiler->emit(INST_MOVDQA, xmm(dst), mem); + return; + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + compiler->emit(INST_CVTSS2SD, xmm(dst), mem); + return; + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + compiler->emit(INST_MOVSD, xmm(dst), mem); + return; + } + break; + } + } + + compiler->setError(ERROR_INCOMPATIBLE_ARGUMENT); +} + +// Prototype & Arguments Management. +void ECall::_setPrototype( + uint32_t callingConvention, + const uint32_t* arguments, + uint32_t argumentsCount, + uint32_t returnValue) ASMJIT_NOTHROW +{ + _functionPrototype.setPrototype(callingConvention, arguments, argumentsCount, returnValue); + + _args = reinterpret_cast<Operand*>( + getCompiler()->getZone().zalloc(sizeof(Operand) * argumentsCount)); + memset(_args, 0, sizeof(Operand) * argumentsCount); +} + +bool ECall::setArgument(uint32_t i, const BaseVar& var) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(i < _functionPrototype.getArgumentsCount()); + if (i >= _functionPrototype.getArgumentsCount()) return false; + + _args[i] = var; + return true; +} + +bool ECall::setArgument(uint32_t i, const Imm& imm) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(i < _functionPrototype.getArgumentsCount()); + if (i >= _functionPrototype.getArgumentsCount()) return false; + + _args[i] = imm; + return true; +} + +bool ECall::setReturn(const Operand& first, const Operand& second) ASMJIT_NOTHROW +{ + _ret[0] = first; + _ret[1] = second; + + return true; +} + +// ============================================================================ +// [AsmJit::ERet] +// ============================================================================ + +ERet::ERet(Compiler* c, EFunction* function, const Operand* first, const Operand* second) ASMJIT_NOTHROW : + Emittable(c, EMITTABLE_RET), + _function(function) +{ + if (first ) _ret[0] = *first; + if (second) _ret[1] = *second; + +/* + // TODO:? + + // Check whether the return value is compatible. + uint32_t retValType = function->getPrototype().getReturnValue(); + bool valid = false; + + switch (retValType) + { + case VARIABLE_TYPE_GPD: + case VARIABLE_TYPE_GPQ: + if ((_ret[0].isVar() && (reinterpret_cast<const BaseVar&>(_ret[0]).isGPVar())) || + (_ret[0].isImm())) + { + valid = true; + } + break; + + case VARIABLE_TYPE_X87: + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_X87_1D: + if ((_ret[0].isVar() && (reinterpret_cast<const BaseVar&>(_ret[0]).isX87Var() || + reinterpret_cast<const BaseVar&>(_ret[0]).isXMMVar() )) ) + { + valid = true; + } + break; + + case VARIABLE_TYPE_MM: + break; + + case INVALID_VALUE: + if (_ret[0].isNone() && + _ret[1].isNone()) + { + valid = true; + } + break; + + default: + break; + } + + // Incompatible return value. + if (!valid) + { + c->setError(ERROR_INCOMPATIBLE_RETURN_VALUE); + } +*/ +} + +ERet::~ERet() ASMJIT_NOTHROW +{ +} + +void ERet::prepare(CompilerContext& cc) ASMJIT_NOTHROW +{ + _offset = cc._currentOffset; + + uint32_t retValType = getFunction()->getPrototype().getReturnValue(); + if (retValType != INVALID_VALUE) + { + uint32_t i; + for (i = 0; i < 2; i++) + { + Operand& o = _ret[i]; + + if (o.isVar()) + { + ASMJIT_ASSERT(o.getId() != INVALID_VALUE); + VarData* vdata = _compiler->_getVarData(o.getId()); + ASMJIT_ASSERT(vdata != NULL); + + // First emittable (begin of variable scope). + if (vdata->firstEmittable == NULL) vdata->firstEmittable = this; + + // Last emittable (end of variable scope). + vdata->lastEmittable = this; + + if (vdata->workOffset == _offset) continue; + if (!cc._isActive(vdata)) cc._addActive(vdata); + + vdata->workOffset = _offset; + vdata->registerReadCount++; + + if (isVariableInteger(vdata->type) && isVariableInteger(retValType)) + { + cc._newRegisterHomeIndex(vdata, (i == 0) ? REG_INDEX_EAX : REG_INDEX_EDX); + } + } + } + } + + cc._currentOffset++; +} + +Emittable* ERet::translate(CompilerContext& cc) ASMJIT_NOTHROW +{ + Compiler* compiler = cc.getCompiler(); + + // Check whether the return value is compatible. + uint32_t retValType = getFunction()->getPrototype().getReturnValue(); + uint32_t i; + + switch (retValType) + { + case VARIABLE_TYPE_GPD: + case VARIABLE_TYPE_GPQ: + for (i = 0; i < 2; i++) + { + uint32_t dsti = (i == 0) ? REG_INDEX_EAX : REG_INDEX_EDX; + uint32_t srci; + + if (_ret[i].isVar()) + { + if (reinterpret_cast<const BaseVar&>(_ret[i]).isGPVar()) + { + VarData* vdata = compiler->_getVarData(_ret[i].getId()); + ASMJIT_ASSERT(vdata != NULL); + + srci = vdata->registerIndex; + if (srci == INVALID_VALUE) + compiler->emit(INST_MOV, gpn(dsti), cc._getVarMem(vdata)); + else if (dsti != srci) + compiler->emit(INST_MOV, gpn(dsti), gpn(srci)); + } + } + else if (_ret[i].isImm()) + { + compiler->emit(INST_MOV, gpn(dsti), _ret[i]); + } + } + break; + + case VARIABLE_TYPE_X87: + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_X87_1D: + // There is case that we need to return two values (Unix-ABI specific): + // - FLD #2 + //- FLD #1 + i = 2; + do { + i--; + uint32_t dsti = i; + uint32_t srci; + + if (_ret[i].isVar()) + { + if (reinterpret_cast<const BaseVar&>(_ret[i]).isX87Var()) + { + // TODO: X87. + } + else if (reinterpret_cast<const BaseVar&>(_ret[i]).isXMMVar()) + { + VarData* vdata = compiler->_getVarData(_ret[i].getId()); + ASMJIT_ASSERT(vdata != NULL); + + srci = vdata->registerIndex; + if (srci != INVALID_VALUE) cc.saveXMMVar(vdata); + + switch (vdata->type) + { + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + compiler->emit(INST_FLD, _baseVarMem(reinterpret_cast<BaseVar&>(_ret[i]), 4)); + break; + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + compiler->emit(INST_FLD, _baseVarMem(reinterpret_cast<BaseVar&>(_ret[i]), 8)); + break; + } + } + } + } while (i != 0); + break; + + case VARIABLE_TYPE_MM: + for (i = 0; i < 2; i++) + { + uint32_t dsti = i; + uint32_t srci; + + if (_ret[i].isVar()) + { + if (reinterpret_cast<const BaseVar&>(_ret[i]).isGPVar()) + { + VarData* vdata = compiler->_getVarData(_ret[i].getId()); + ASMJIT_ASSERT(vdata != NULL); + + srci = vdata->registerIndex; + uint32_t inst = _ret[i].isRegType(REG_TYPE_GPQ) ? INST_MOVQ : INST_MOVD; + + if (srci == INVALID_VALUE) + compiler->emit(inst, mm(dsti), cc._getVarMem(vdata)); + else +#if defined(ASMJIT_X86) + compiler->emit(inst, mm(dsti), gpd(srci)); +#else + compiler->emit(inst, mm(dsti), _ret[i].isRegType(REG_TYPE_GPQ) ? gpq(srci) : gpd(srci)); +#endif + } + else if (reinterpret_cast<const BaseVar&>(_ret[i]).isMMVar()) + { + VarData* vdata = compiler->_getVarData(_ret[i].getId()); + ASMJIT_ASSERT(vdata != NULL); + + srci = vdata->registerIndex; + uint32_t inst = INST_MOVQ; + + if (srci == INVALID_VALUE) + compiler->emit(inst, mm(dsti), cc._getVarMem(vdata)); + else if (dsti != srci) + compiler->emit(inst, mm(dsti), mm(srci)); + } + else if (reinterpret_cast<const BaseVar&>(_ret[i]).isXMMVar()) + { + VarData* vdata = compiler->_getVarData(_ret[i].getId()); + ASMJIT_ASSERT(vdata != NULL); + + srci = vdata->registerIndex; + uint32_t inst = INST_MOVQ; + if (reinterpret_cast<const BaseVar&>(_ret[i]).getVariableType() == VARIABLE_TYPE_XMM_1F) inst = INST_MOVD; + + if (srci == INVALID_VALUE) + compiler->emit(inst, mm(dsti), cc._getVarMem(vdata)); + else + compiler->emit(inst, mm(dsti), xmm(srci)); + } + } + } + break; + + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_2D: + for (i = 0; i < 2; i++) + { + uint32_t dsti = i; + uint32_t srci; + + if (_ret[i].isVar()) + { + if (reinterpret_cast<const BaseVar&>(_ret[i]).isGPVar()) + { + VarData* vdata = compiler->_getVarData(_ret[i].getId()); + ASMJIT_ASSERT(vdata != NULL); + + srci = vdata->registerIndex; + uint32_t inst = _ret[i].isRegType(REG_TYPE_GPQ) ? INST_MOVQ : INST_MOVD; + + if (srci == INVALID_VALUE) + compiler->emit(inst, xmm(dsti), cc._getVarMem(vdata)); + else +#if defined(ASMJIT_X86) + compiler->emit(inst, xmm(dsti), gpd(srci)); +#else + compiler->emit(inst, xmm(dsti), _ret[i].isRegType(REG_TYPE_GPQ) ? gpq(srci) : gpd(srci)); +#endif + } + else if (reinterpret_cast<const BaseVar&>(_ret[i]).isX87Var()) + { + // TODO: X87. + } + else if (reinterpret_cast<const BaseVar&>(_ret[i]).isMMVar()) + { + VarData* vdata = compiler->_getVarData(_ret[i].getId()); + ASMJIT_ASSERT(vdata != NULL); + + srci = vdata->registerIndex; + if (srci == INVALID_VALUE) + compiler->emit(INST_MOVQ, xmm(dsti), cc._getVarMem(vdata)); + else + compiler->emit(INST_MOVQ, xmm(dsti), mm(srci)); + } + else if (reinterpret_cast<const BaseVar&>(_ret[i]).isXMMVar()) + { + VarData* vdata = compiler->_getVarData(_ret[i].getId()); + ASMJIT_ASSERT(vdata != NULL); + + srci = vdata->registerIndex; + if (srci == INVALID_VALUE) + compiler->emit(INST_MOVDQA, xmm(dsti), cc._getVarMem(vdata)); + else if (dsti != srci) + compiler->emit(INST_MOVDQA, xmm(dsti), xmm(srci)); + } + } + } + break; + + case VARIABLE_TYPE_XMM_1F: + for (i = 0; i < 2; i++) + { + uint32_t dsti = i; + uint32_t srci; + + if (_ret[i].isVar()) + { + if (reinterpret_cast<const BaseVar&>(_ret[i]).isX87Var()) + { + // TODO: X87. + } + else if (reinterpret_cast<const BaseVar&>(_ret[i]).isXMMVar()) + { + VarData* vdata = compiler->_getVarData(_ret[i].getId()); + ASMJIT_ASSERT(vdata != NULL); + + srci = vdata->registerIndex; + switch (vdata->type) + { + case VARIABLE_TYPE_XMM: + if (srci == INVALID_VALUE) + compiler->emit(INST_MOVDQA, xmm(dsti), cc._getVarMem(vdata)); + else if (dsti != srci) + compiler->emit(INST_MOVDQA, xmm(dsti), xmm(srci)); + break; + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + if (srci == INVALID_VALUE) + compiler->emit(INST_MOVSS, xmm(dsti), cc._getVarMem(vdata)); + else + compiler->emit(INST_MOVSS, xmm(dsti), xmm(srci)); + break; + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + if (srci == INVALID_VALUE) + compiler->emit(INST_CVTSD2SS, xmm(dsti), cc._getVarMem(vdata)); + else if (dsti != srci) + compiler->emit(INST_CVTSD2SS, xmm(dsti), xmm(srci)); + break; + } + } + } + } + break; + + case VARIABLE_TYPE_XMM_1D: + for (i = 0; i < 2; i++) + { + uint32_t dsti = i; + uint32_t srci; + + if (_ret[i].isVar()) + { + if (reinterpret_cast<const BaseVar&>(_ret[i]).isX87Var()) + { + // TODO: X87. + } + else if (reinterpret_cast<const BaseVar&>(_ret[i]).isXMMVar()) + { + VarData* vdata = compiler->_getVarData(_ret[i].getId()); + ASMJIT_ASSERT(vdata != NULL); + + srci = vdata->registerIndex; + switch (vdata->type) + { + case VARIABLE_TYPE_XMM: + if (srci == INVALID_VALUE) + compiler->emit(INST_MOVDQA, xmm(dsti), cc._getVarMem(vdata)); + else if (dsti != srci) + compiler->emit(INST_MOVDQA, xmm(dsti), xmm(srci)); + break; + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + if (srci == INVALID_VALUE) + compiler->emit(INST_CVTSS2SD, xmm(dsti), cc._getVarMem(vdata)); + else + compiler->emit(INST_CVTSS2SD, xmm(dsti), xmm(srci)); + break; + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + if (srci == INVALID_VALUE) + compiler->emit(INST_MOVSD, xmm(dsti), cc._getVarMem(vdata)); + else + compiler->emit(INST_MOVSD, xmm(dsti), xmm(srci)); + break; + } + } + } + } + break; + + case INVALID_VALUE: + default: + break; + } + + if (shouldEmitJumpToEpilog()) + { + cc._unrecheable = 1; + } + + for (i = 0; i < 2; i++) + { + if (_ret[i].isVar()) + { + VarData* vdata = compiler->_getVarData(_ret[i].getId()); + cc._unuseVarOnEndOfScope(this, vdata); + } + } + + return translated(); +} + +void ERet::emit(Assembler& a) ASMJIT_NOTHROW +{ + if (shouldEmitJumpToEpilog()) + { + a.jmp(getFunction()->getExitLabel()); + } +} + +int ERet::getMaxSize() const ASMJIT_NOTHROW +{ + return shouldEmitJumpToEpilog() ? 15 : 0; +} + +bool ERet::shouldEmitJumpToEpilog() const ASMJIT_NOTHROW +{ + // Iterate over next emittables. If we found emittable that emits real + // instruction then we must return @c true. + Emittable* e = this->getNext(); + + while (e) + { + switch (e->getType()) + { + // Non-interesting emittables. + case EMITTABLE_COMMENT: + case EMITTABLE_DUMMY: + case EMITTABLE_ALIGN: + case EMITTABLE_BLOCK: + case EMITTABLE_VARIABLE_HINT: + case EMITTABLE_TARGET: + break; + + // Interesting emittables. + case EMITTABLE_EMBEDDED_DATA: + case EMITTABLE_INSTRUCTION: + case EMITTABLE_JUMP_TABLE: + case EMITTABLE_CALL: + case EMITTABLE_RET: + return true; + + // These emittables shouldn't be here. We are inside function, after + // prolog. + case EMITTABLE_FUNCTION: + case EMITTABLE_PROLOG: + break; + + // Stop station, we can't go forward from here. + case EMITTABLE_EPILOG: + return false; + } + e = e->getNext(); + } + + return false; +} + +// ============================================================================ +// [AsmJit::CompilerContext - Construction / Destruction] +// ============================================================================ + +CompilerContext::CompilerContext(Compiler* compiler) ASMJIT_NOTHROW : + _zone(8192 - sizeof(Zone::Chunk) - 32) +{ + _compiler = compiler; + _clear(); + + _emitComments = compiler->getLogger() != NULL; +} + +CompilerContext::~CompilerContext() ASMJIT_NOTHROW +{ +} + +// ============================================================================ +// [AsmJit::CompilerContext - Clear] +// ============================================================================ + +void CompilerContext::_clear() ASMJIT_NOTHROW +{ + _zone.clear(); + _function = NULL; + + _start = NULL; + _stop = NULL; + + _state.clear(); + _active = NULL; + + _forwardJumps = NULL; + + _currentOffset = 0; + _unrecheable = 0; + + _modifiedGPRegisters = 0; + _modifiedMMRegisters = 0; + _modifiedXMMRegisters = 0; + + _allocableEBP = false; + + _adjustESP = 0; + + _argumentsBaseReg = INVALID_VALUE; // Used by patcher. + _argumentsBaseOffset = 0; // Used by patcher. + _argumentsActualDisp = 0; // Used by translate(). + + _variablesBaseReg = INVALID_VALUE; // Used by patcher. + _variablesBaseOffset = 0; // Used by patcher. + _variablesActualDisp = 0; // Used by translate() + + _memUsed = NULL; + _memFree = NULL; + + _mem4BlocksCount = 0; + _mem8BlocksCount = 0; + _mem16BlocksCount = 0; + + _memBytesTotal = 0; + + _backCode.clear(); + _backPos = 0; +} + +// ============================================================================ +// [AsmJit::CompilerContext - Construction / Destruction] +// ============================================================================ + +void CompilerContext::allocVar(VarData* vdata, uint32_t regMask, uint32_t vflags) ASMJIT_NOTHROW +{ + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: +#endif // ASMJIT_X64 + allocGPVar(vdata, regMask, vflags); + break; + + case VARIABLE_TYPE_X87: + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_X87_1D: + // TODO: X87 VARIABLES NOT IMPLEMENTED. + break; + + case VARIABLE_TYPE_MM: + allocMMVar(vdata, regMask, vflags); + break; + + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + allocXMMVar(vdata, regMask, vflags); + break; + } + + _postAlloc(vdata, vflags); +} + +void CompilerContext::saveVar(VarData* vdata) ASMJIT_NOTHROW +{ + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: +#endif // ASMJIT_X64 + saveGPVar(vdata); + break; + + case VARIABLE_TYPE_X87: + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_X87_1D: + // TODO: X87 VARIABLES NOT IMPLEMENTED. + break; + + case VARIABLE_TYPE_MM: + saveMMVar(vdata); + break; + + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + saveXMMVar(vdata); + break; + } +} + +void CompilerContext::spillVar(VarData* vdata) ASMJIT_NOTHROW +{ + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: +#endif // ASMJIT_X64 + spillGPVar(vdata); + break; + + case VARIABLE_TYPE_X87: + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_X87_1D: + // TODO: X87 VARIABLES NOT IMPLEMENTED. + break; + + case VARIABLE_TYPE_MM: + spillMMVar(vdata); + break; + + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + spillXMMVar(vdata); + break; + } +} + +void CompilerContext::unuseVar(VarData* vdata, uint32_t toState) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(toState != VARIABLE_STATE_REGISTER); + + if (vdata->state == VARIABLE_STATE_REGISTER) + { + uint32_t registerIndex = vdata->registerIndex; + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: +#endif // ASMJIT_X64 + _state.gp[registerIndex] = NULL; + _freedGPRegister(registerIndex); + break; + + case VARIABLE_TYPE_X87: + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_X87_1D: + // TODO: X87 VARIABLES NOT IMPLEMENTED. + break; + + case VARIABLE_TYPE_MM: + _state.mm[registerIndex] = NULL; + _freedMMRegister(registerIndex); + break; + + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + _state.xmm[registerIndex] = NULL; + _freedXMMRegister(registerIndex); + break; + } + } + + vdata->state = toState; + vdata->changed = false; + vdata->registerIndex = INVALID_VALUE; +} + +void CompilerContext::allocGPVar(VarData* vdata, uint32_t regMask, uint32_t vflags) ASMJIT_NOTHROW +{ + // Fix the regMask (0 or full bit-array means that any register may be used). + if (regMask == 0) regMask = Util::maskUpToIndex(REG_NUM_GP); + regMask &= Util::maskUpToIndex(REG_NUM_GP); + + // Working variables. + uint32_t i; + uint32_t mask; + + // Last register code (aka home). + uint32_t home = vdata->homeRegisterIndex; + // New register code. + uint32_t idx = INVALID_VALUE; + + // Preserved GP variables. + uint32_t preservedGP = vdata->scope->getPrototype().getPreservedGP(); + + // Spill candidate. + VarData* spillCandidate = NULL; + + // Whether to alloc the non-preserved variables first. + bool nonPreservedFirst = true; + if (getFunction()->_isCaller) + { + nonPreservedFirst = vdata->firstCallable == NULL || + vdata->firstCallable->getOffset() >= vdata->lastEmittable->getOffset(); + } + + // -------------------------------------------------------------------------- + // [Already Allocated] + // -------------------------------------------------------------------------- + + // Go away if variable is already allocated. + if (vdata->state == VARIABLE_STATE_REGISTER) + { + uint32_t oldIndex = vdata->registerIndex; + + // Already allocated in the right register. + if (Util::maskFromIndex(oldIndex) & regMask) return; + + // Try to find unallocated register first. + mask = regMask & ~_state.usedGP; + if (mask != 0) + { + idx = Util::findFirstBit( + (nonPreservedFirst && (mask & ~preservedGP) != 0) ? mask & ~preservedGP : mask); + } + // Then find the allocated and later exchange. + else + { + idx = Util::findFirstBit(regMask & _state.usedGP); + } + ASMJIT_ASSERT(idx != INVALID_VALUE); + + VarData* other = _state.gp[idx]; + emitExchangeVar(vdata, idx, vflags, other); + + _state.gp[oldIndex] = other; + _state.gp[idx ] = vdata; + + if (other) + other->registerIndex = oldIndex; + else + _freedGPRegister(oldIndex); + + // Update VarData. + vdata->state = VARIABLE_STATE_REGISTER; + vdata->registerIndex = idx; + vdata->homeRegisterIndex = idx; + + _allocatedGPRegister(idx); + return; + } + + // -------------------------------------------------------------------------- + // [Find Unused GP] + // -------------------------------------------------------------------------- + + // If regMask contains restricted registers which may be used then everything + // is handled in this block. + if (regMask != Util::maskUpToIndex(REG_NUM_GP)) + { + // Try to find unallocated register first. + mask = regMask & ~_state.usedGP; + if (mask != 0) + { + idx = Util::findFirstBit( + (nonPreservedFirst && (mask & ~preservedGP) != 0) ? (mask & ~preservedGP) : mask); + ASMJIT_ASSERT(idx != INVALID_VALUE); + } + // Then find the allocated and later spill. + else + { + idx = Util::findFirstBit(regMask & _state.usedGP); + ASMJIT_ASSERT(idx != INVALID_VALUE); + + // Spill register we need. + spillCandidate = _state.gp[idx]; + + // Jump to spill part of allocation. + goto L_Spill; + } + } + + // Home register code. + if (idx == INVALID_VALUE && home != INVALID_VALUE) + { + if ((_state.usedGP & (1U << home)) == 0) idx = home; + } + + // We start from 1, because EAX/RAX register is sometimes explicitly + // needed. So we trying to prevent reallocation in near future. + if (idx == INVALID_VALUE) + { + for (i = 1, mask = (1 << i); i < REG_NUM_GP; i++, mask <<= 1) + { + if ((_state.usedGP & mask) == 0 && (i != REG_INDEX_EBP || _allocableEBP) && (i != REG_INDEX_ESP)) + { + // Convenience to alloc non-preserved first or non-preserved last. + if (nonPreservedFirst) + { + if (idx != INVALID_VALUE && (preservedGP & mask) != 0) continue; + idx = i; + // If current register is preserved, we should try to find different + // one that is not. This can save one push / pop in prolog / epilog. + if ((preservedGP & mask) == 0) break; + } + else + { + if (idx != INVALID_VALUE && (preservedGP & mask) == 0) continue; + idx = i; + // The opposite. + if ((preservedGP & mask) != 0) break; + } + } + } + } + + // If not found, try EAX/RAX. + if (idx == INVALID_VALUE && (_state.usedGP & 1) == 0) + { + idx = REG_INDEX_EAX; + } + + // -------------------------------------------------------------------------- + // [Spill] + // -------------------------------------------------------------------------- + + // If register is still not found, spill other variable. + if (idx == INVALID_VALUE) + { + if (spillCandidate == NULL) + { + spillCandidate = _getSpillCandidateGP(); + } + + // Spill candidate not found? + if (spillCandidate == NULL) + { + _compiler->setError(ERROR_NOT_ENOUGH_REGISTERS); + return; + } + +L_Spill: + + // Prevented variables can't be spilled. _getSpillCandidate() never returns + // prevented variables, but when jumping to L_spill it can happen. + if (spillCandidate->workOffset == _currentOffset) + { + _compiler->setError(ERROR_REGISTERS_OVERLAP); + return; + } + + idx = spillCandidate->registerIndex; + spillGPVar(spillCandidate); + } + + // -------------------------------------------------------------------------- + // [Alloc] + // -------------------------------------------------------------------------- + + if (vdata->state == VARIABLE_STATE_MEMORY && (vflags & VARIABLE_ALLOC_READ) != 0) + { + emitLoadVar(vdata, idx); + } + + // Update VarData. + vdata->state = VARIABLE_STATE_REGISTER; + vdata->registerIndex = idx; + vdata->homeRegisterIndex = idx; + + // Update StateData. + _allocatedVariable(vdata); +} + +void CompilerContext::saveGPVar(VarData* vdata) ASMJIT_NOTHROW +{ + // Can't save variable that isn't allocated. + ASMJIT_ASSERT(vdata->state == VARIABLE_STATE_REGISTER); + ASMJIT_ASSERT(vdata->registerIndex != INVALID_VALUE); + + uint32_t idx = vdata->registerIndex; + emitSaveVar(vdata, idx); + + // Update VarData. + vdata->changed = false; +} + +void CompilerContext::spillGPVar(VarData* vdata) ASMJIT_NOTHROW +{ + // Can't spill variable that isn't allocated. + ASMJIT_ASSERT(vdata->state == VARIABLE_STATE_REGISTER); + ASMJIT_ASSERT(vdata->registerIndex != INVALID_VALUE); + + uint32_t idx = vdata->registerIndex; + + if (vdata->changed) emitSaveVar(vdata, idx); + + // Update VarData. + vdata->registerIndex = INVALID_VALUE; + vdata->state = VARIABLE_STATE_MEMORY; + vdata->changed = false; + + // Update StateData. + _state.gp[idx] = NULL; + _freedGPRegister(idx); +} + +void CompilerContext::allocMMVar(VarData* vdata, uint32_t regMask, uint32_t vflags) ASMJIT_NOTHROW +{ + // Fix the regMask (0 or full bit-array means that any register may be used). + if (regMask == 0) regMask = Util::maskUpToIndex(REG_NUM_MM); + regMask &= Util::maskUpToIndex(REG_NUM_MM); + + // Working variables. + uint32_t i; + uint32_t mask; + + // Last register code (aka home). + uint32_t home = vdata->homeRegisterIndex; + // New register code. + uint32_t idx = INVALID_VALUE; + + // Preserved MM variables. + // + // NOTE: Currently MM variables are not preserved and there is no calling + // convention known to me that does that. But on the other side it's possible + // to write such calling convention. + uint32_t preservedMM = vdata->scope->getPrototype().getPreservedMM(); + + // Spill candidate. + VarData* spillCandidate = NULL; + + // Whether to alloc non-preserved first or last. + bool nonPreservedFirst = true; + if (this->getFunction()->_isCaller) + { + nonPreservedFirst = vdata->firstCallable == NULL || + vdata->firstCallable->getOffset() >= vdata->lastEmittable->getOffset(); + } + + // -------------------------------------------------------------------------- + // [Already Allocated] + // -------------------------------------------------------------------------- + + // Go away if variable is already allocated. + if (vdata->state == VARIABLE_STATE_REGISTER) + { + uint32_t oldIndex = vdata->registerIndex; + + // Already allocated in the right register. + if (Util::maskFromIndex(oldIndex) & regMask) return; + + // Try to find unallocated register first. + mask = regMask & ~_state.usedMM; + if (mask != 0) + { + idx = Util::findFirstBit( + (nonPreservedFirst && (mask & ~preservedMM) != 0) ? mask & ~preservedMM : mask); + } + // Then find the allocated and later exchange. + else + { + idx = Util::findFirstBit(regMask & _state.usedMM); + } + ASMJIT_ASSERT(idx != INVALID_VALUE); + + VarData* other = _state.mm[idx]; + if (other) spillMMVar(other); + + emitMoveVar(vdata, idx, vflags); + _freedMMRegister(oldIndex); + _state.mm[idx] = vdata; + + // Update VarData. + vdata->state = VARIABLE_STATE_REGISTER; + vdata->registerIndex = idx; + vdata->homeRegisterIndex = idx; + + _allocatedMMRegister(idx); + return; + } + + // -------------------------------------------------------------------------- + // [Find Unused MM] + // -------------------------------------------------------------------------- + + // If regMask contains restricted registers which may be used then everything + // is handled in this block. + if (regMask != Util::maskUpToIndex(REG_NUM_MM)) + { + // Try to find unallocated register first. + mask = regMask & ~_state.usedMM; + if (mask != 0) + { + idx = Util::findFirstBit( + (nonPreservedFirst && (mask & ~preservedMM) != 0) ? mask & ~preservedMM : mask); + ASMJIT_ASSERT(idx != INVALID_VALUE); + } + // Then find the allocated and later spill. + else + { + idx = Util::findFirstBit(regMask & _state.usedMM); + ASMJIT_ASSERT(idx != INVALID_VALUE); + + // Spill register we need. + spillCandidate = _state.mm[idx]; + + // Jump to spill part of allocation. + goto L_Spill; + } + } + + // Home register code. + if (idx == INVALID_VALUE && home != INVALID_VALUE) + { + if ((_state.usedMM & (1U << home)) == 0) idx = home; + } + + if (idx == INVALID_VALUE) + { + for (i = 0, mask = (1 << i); i < REG_NUM_MM; i++, mask <<= 1) + { + if ((_state.usedMM & mask) == 0) + { + // Convenience to alloc non-preserved first or non-preserved last. + if (nonPreservedFirst) + { + if (idx != INVALID_VALUE && (preservedMM & mask) != 0) continue; + idx = i; + // If current register is preserved, we should try to find different + // one that is not. This can save one push / pop in prolog / epilog. + if ((preservedMM & mask) == 0) break; + } + else + { + if (idx != INVALID_VALUE && (preservedMM & mask) == 0) continue; + idx = i; + // The opposite. + if ((preservedMM & mask) != 0) break; + } + } + } + } + + // -------------------------------------------------------------------------- + // [Spill] + // -------------------------------------------------------------------------- + + // If register is still not found, spill other variable. + if (idx == INVALID_VALUE) + { + if (spillCandidate == NULL) spillCandidate = _getSpillCandidateMM(); + + // Spill candidate not found? + if (spillCandidate == NULL) + { + _compiler->setError(ERROR_NOT_ENOUGH_REGISTERS); + return; + } + +L_Spill: + + // Prevented variables can't be spilled. _getSpillCandidate() never returns + // prevented variables, but when jumping to L_spill it can happen. + if (spillCandidate->workOffset == _currentOffset) + { + _compiler->setError(ERROR_REGISTERS_OVERLAP); + return; + } + + idx = spillCandidate->registerIndex; + spillMMVar(spillCandidate); + } + + // -------------------------------------------------------------------------- + // [Alloc] + // -------------------------------------------------------------------------- + + if (vdata->state == VARIABLE_STATE_MEMORY && (vflags & VARIABLE_ALLOC_READ) != 0) + { + emitLoadVar(vdata, idx); + } + + // Update VarData. + vdata->state = VARIABLE_STATE_REGISTER; + vdata->registerIndex = idx; + vdata->homeRegisterIndex = idx; + + // Update StateData. + _allocatedVariable(vdata); +} + +void CompilerContext::saveMMVar(VarData* vdata) ASMJIT_NOTHROW +{ + // Can't save variable that isn't allocated. + ASMJIT_ASSERT(vdata->state == VARIABLE_STATE_REGISTER); + ASMJIT_ASSERT(vdata->registerIndex != INVALID_VALUE); + + uint32_t idx = vdata->registerIndex; + emitSaveVar(vdata, idx); + + // Update VarData. + vdata->changed = false; +} + +void CompilerContext::spillMMVar(VarData* vdata) ASMJIT_NOTHROW +{ + // Can't spill variable that isn't allocated. + ASMJIT_ASSERT(vdata->state == VARIABLE_STATE_REGISTER); + ASMJIT_ASSERT(vdata->registerIndex != INVALID_VALUE); + + uint32_t idx = vdata->registerIndex; + + if (vdata->changed) emitSaveVar(vdata, idx); + + // Update VarData. + vdata->registerIndex = INVALID_VALUE; + vdata->state = VARIABLE_STATE_MEMORY; + vdata->changed = false; + + // Update StateData. + _state.mm[idx] = NULL; + _freedMMRegister(idx); +} + +void CompilerContext::allocXMMVar(VarData* vdata, uint32_t regMask, uint32_t vflags) ASMJIT_NOTHROW +{ + // Fix the regMask (0 or full bit-array means that any register may be used). + if (regMask == 0) regMask = Util::maskUpToIndex(REG_NUM_XMM); + regMask &= Util::maskUpToIndex(REG_NUM_XMM); + + // Working variables. + uint32_t i; + uint32_t mask; + + // Last register code (aka home). + uint32_t home = vdata->homeRegisterIndex; + // New register code. + uint32_t idx = INVALID_VALUE; + + // Preserved XMM variables. + uint32_t preservedXMM = vdata->scope->getPrototype().getPreservedXMM(); + + // Spill candidate. + VarData* spillCandidate = NULL; + + // Whether to alloc non-preserved first or last. + bool nonPreservedFirst = true; + if (this->getFunction()->_isCaller) + { + nonPreservedFirst = vdata->firstCallable == NULL || + vdata->firstCallable->getOffset() >= vdata->lastEmittable->getOffset(); + } + + // -------------------------------------------------------------------------- + // [Already Allocated] + // -------------------------------------------------------------------------- + + // Go away if variable is already allocated. + if (vdata->state == VARIABLE_STATE_REGISTER) + { + uint32_t oldIndex = vdata->registerIndex; + + // Already allocated in the right register. + if (Util::maskFromIndex(oldIndex) & regMask) return; + + // Try to find unallocated register first. + mask = regMask & ~_state.usedXMM; + if (mask != 0) + { + idx = Util::findFirstBit( + (nonPreservedFirst && (mask & ~preservedXMM) != 0) ? mask & ~preservedXMM : mask); + } + // Then find the allocated and later exchange. + else + { + idx = Util::findFirstBit(regMask & _state.usedXMM); + } + ASMJIT_ASSERT(idx != INVALID_VALUE); + + VarData* other = _state.xmm[idx]; + if (other) spillXMMVar(other); + + emitMoveVar(vdata, idx, vflags); + _freedXMMRegister(oldIndex); + _state.xmm[idx] = vdata; + + // Update VarData. + vdata->state = VARIABLE_STATE_REGISTER; + vdata->registerIndex = idx; + vdata->homeRegisterIndex = idx; + + _allocatedXMMRegister(idx); + return; + } + + // -------------------------------------------------------------------------- + // [Find Unused XMM] + // -------------------------------------------------------------------------- + + // If regMask contains restricted registers which may be used then everything + // is handled in this block. + if (regMask != Util::maskUpToIndex(REG_NUM_XMM)) + { + // Try to find unallocated register first. + mask = regMask & ~_state.usedXMM; + if (mask != 0) + { + idx = Util::findFirstBit( + (nonPreservedFirst && (mask & ~preservedXMM) != 0) ? mask & ~preservedXMM : mask); + ASMJIT_ASSERT(idx != INVALID_VALUE); + } + // Then find the allocated and later spill. + else + { + idx = Util::findFirstBit(regMask & _state.usedXMM); + ASMJIT_ASSERT(idx != INVALID_VALUE); + + // Spill register we need. + spillCandidate = _state.xmm[idx]; + + // Jump to spill part of allocation. + goto L_Spill; + } + } + + // Home register code. + if (idx == INVALID_VALUE && home != INVALID_VALUE) + { + if ((_state.usedXMM & (1U << home)) == 0) idx = home; + } + + if (idx == INVALID_VALUE) + { + for (i = 0, mask = (1 << i); i < REG_NUM_XMM; i++, mask <<= 1) + { + if ((_state.usedXMM & mask) == 0) + { + // Convenience to alloc non-preserved first or non-preserved last. + if (nonPreservedFirst) + { + if (idx != INVALID_VALUE && (preservedXMM & mask) != 0) continue; + idx = i; + // If current register is preserved, we should try to find different + // one that is not. This can save one push / pop in prolog / epilog. + if ((preservedXMM & mask) == 0) break; + } + else + { + if (idx != INVALID_VALUE && (preservedXMM & mask) == 0) continue; + idx = i; + // The opposite. + if ((preservedXMM & mask) != 0) break; + } + } + } + } + + // -------------------------------------------------------------------------- + // [Spill] + // -------------------------------------------------------------------------- + + // If register is still not found, spill other variable. + if (idx == INVALID_VALUE) + { + if (spillCandidate == NULL) spillCandidate = _getSpillCandidateXMM(); + + // Spill candidate not found? + if (spillCandidate == NULL) + { + _compiler->setError(ERROR_NOT_ENOUGH_REGISTERS); + return; + } + +L_Spill: + + // Prevented variables can't be spilled. _getSpillCandidate() never returns + // prevented variables, but when jumping to L_spill it can happen. + if (spillCandidate->workOffset == _currentOffset) + { + _compiler->setError(ERROR_REGISTERS_OVERLAP); + return; + } + + idx = spillCandidate->registerIndex; + spillXMMVar(spillCandidate); + } + + // -------------------------------------------------------------------------- + // [Alloc] + // -------------------------------------------------------------------------- + + if (vdata->state == VARIABLE_STATE_MEMORY && (vflags & VARIABLE_ALLOC_READ) != 0) + { + emitLoadVar(vdata, idx); + } + + // Update VarData. + vdata->state = VARIABLE_STATE_REGISTER; + vdata->registerIndex = idx; + vdata->homeRegisterIndex = idx; + + // Update StateData. + _allocatedVariable(vdata); +} + +void CompilerContext::saveXMMVar(VarData* vdata) ASMJIT_NOTHROW +{ + // Can't save variable that isn't allocated. + ASMJIT_ASSERT(vdata->state == VARIABLE_STATE_REGISTER); + ASMJIT_ASSERT(vdata->registerIndex != INVALID_VALUE); + + uint32_t idx = vdata->registerIndex; + emitSaveVar(vdata, idx); + + // Update VarData. + vdata->changed = false; +} + +void CompilerContext::spillXMMVar(VarData* vdata) ASMJIT_NOTHROW +{ + // Can't spill variable that isn't allocated. + ASMJIT_ASSERT(vdata->state == VARIABLE_STATE_REGISTER); + ASMJIT_ASSERT(vdata->registerIndex != INVALID_VALUE); + + uint32_t idx = vdata->registerIndex; + + if (vdata->changed) emitSaveVar(vdata, idx); + + // Update VarData. + vdata->registerIndex = INVALID_VALUE; + vdata->state = VARIABLE_STATE_MEMORY; + vdata->changed = false; + + // Update StateData. + _state.xmm[idx] = NULL; + _freedXMMRegister(idx); +} + +void CompilerContext::emitLoadVar(VarData* vdata, uint32_t regIndex) ASMJIT_NOTHROW +{ + Mem m = _getVarMem(vdata); + + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + _compiler->emit(INST_MOV, gpd(regIndex), m); + if (_emitComments) goto addComment; + break; +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + _compiler->emit(INST_MOV, gpq(regIndex), m); + if (_emitComments) goto addComment; + break; +#endif // ASMJIT_X64 + + case VARIABLE_TYPE_X87: + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_X87_1D: + // TODO: X87 VARIABLES NOT IMPLEMENTED. + break; + + case VARIABLE_TYPE_MM: + _compiler->emit(INST_MOVQ, mm(regIndex), m); + if (_emitComments) goto addComment; + break; + + case VARIABLE_TYPE_XMM: + _compiler->emit(INST_MOVDQA, xmm(regIndex), m); + if (_emitComments) goto addComment; + break; + case VARIABLE_TYPE_XMM_1F: + _compiler->emit(INST_MOVSS, xmm(regIndex), m); + if (_emitComments) goto addComment; + break; + case VARIABLE_TYPE_XMM_1D: + _compiler->emit(INST_MOVSD, xmm(regIndex), m); + if (_emitComments) goto addComment; + break; + case VARIABLE_TYPE_XMM_4F: + _compiler->emit(INST_MOVAPS, xmm(regIndex), m); + if (_emitComments) goto addComment; + break; + case VARIABLE_TYPE_XMM_2D: + _compiler->emit(INST_MOVAPD, xmm(regIndex), m); + if (_emitComments) goto addComment; + break; + } + return; + +addComment: + _compiler->getCurrentEmittable()->setCommentF("Alloc %s", vdata->name); +} + +void CompilerContext::emitSaveVar(VarData* vdata, uint32_t regIndex) ASMJIT_NOTHROW +{ + // Caller must ensure that variable is allocated. + ASMJIT_ASSERT(regIndex != INVALID_VALUE); + + Mem m = _getVarMem(vdata); + + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + _compiler->emit(INST_MOV, m, gpd(regIndex)); + if (_emitComments) goto addComment; + break; +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + _compiler->emit(INST_MOV, m, gpq(regIndex)); + if (_emitComments) goto addComment; + break; +#endif // ASMJIT_X64 + + case VARIABLE_TYPE_X87: + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_X87_1D: + // TODO: X87 VARIABLES NOT IMPLEMENTED. + break; + + case VARIABLE_TYPE_MM: + _compiler->emit(INST_MOVQ, m, mm(regIndex)); + if (_emitComments) goto addComment; + break; + + case VARIABLE_TYPE_XMM: + _compiler->emit(INST_MOVDQA, m, xmm(regIndex)); + if (_emitComments) goto addComment; + break; + case VARIABLE_TYPE_XMM_1F: + _compiler->emit(INST_MOVSS, m, xmm(regIndex)); + if (_emitComments) goto addComment; + break; + case VARIABLE_TYPE_XMM_1D: + _compiler->emit(INST_MOVSD, m, xmm(regIndex)); + if (_emitComments) goto addComment; + break; + case VARIABLE_TYPE_XMM_4F: + _compiler->emit(INST_MOVAPS, m, xmm(regIndex)); + if (_emitComments) goto addComment; + break; + case VARIABLE_TYPE_XMM_2D: + _compiler->emit(INST_MOVAPD, m, xmm(regIndex)); + if (_emitComments) goto addComment; + break; + } + return; + +addComment: + _compiler->getCurrentEmittable()->setCommentF("Spill %s", vdata->name); +} + +void CompilerContext::emitMoveVar(VarData* vdata, uint32_t regIndex, uint32_t vflags) ASMJIT_NOTHROW +{ + // Caller must ensure that variable is allocated. + ASMJIT_ASSERT(vdata->registerIndex != INVALID_VALUE); + + if ((vflags & VARIABLE_ALLOC_READ) == 0) return; + + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + _compiler->emit(INST_MOV, gpd(regIndex), gpd(vdata->registerIndex)); + break; +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + _compiler->emit(INST_MOV, gpq(regIndex), gpq(vdata->registerIndex)); + break; +#endif // ASMJIT_X64 + + case VARIABLE_TYPE_X87: + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_X87_1D: + // TODO: X87 VARIABLES NOT IMPLEMENTED. + break; + + case VARIABLE_TYPE_MM: + _compiler->emit(INST_MOVQ, mm(regIndex), mm(vdata->registerIndex)); + break; + + case VARIABLE_TYPE_XMM: + _compiler->emit(INST_MOVDQA, xmm(regIndex), xmm(vdata->registerIndex)); + break; + case VARIABLE_TYPE_XMM_1F: + _compiler->emit(INST_MOVSS, xmm(regIndex), xmm(vdata->registerIndex)); + break; + case VARIABLE_TYPE_XMM_1D: + _compiler->emit(INST_MOVSD, xmm(regIndex), xmm(vdata->registerIndex)); + break; + case VARIABLE_TYPE_XMM_4F: + _compiler->emit(INST_MOVAPS, xmm(regIndex), xmm(vdata->registerIndex)); + break; + case VARIABLE_TYPE_XMM_2D: + _compiler->emit(INST_MOVAPD, xmm(regIndex), xmm(vdata->registerIndex)); + break; + } +} + +void CompilerContext::emitExchangeVar(VarData* vdata, uint32_t regIndex, uint32_t vflags, VarData* other) ASMJIT_NOTHROW +{ + // Caller must ensure that variable is allocated. + ASMJIT_ASSERT(vdata->registerIndex != INVALID_VALUE); + + // If other is not valid then we can just emit MOV (or other similar instruction). + if (other == NULL) + { + emitMoveVar(vdata, regIndex, vflags); + return; + } + + // If we need to alloc for write-only operation then we can move other + // variable away instead of exchanging them. + if ((vflags & VARIABLE_ALLOC_READ) == 0) + { + emitMoveVar(other, vdata->registerIndex, VARIABLE_ALLOC_READ); + return; + } + + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + _compiler->emit(INST_XCHG, gpd(regIndex), gpd(vdata->registerIndex)); + break; +#if defined(ASMJIT_X64) + case VARIABLE_TYPE_GPQ: + _compiler->emit(INST_XCHG, gpq(regIndex), gpq(vdata->registerIndex)); + break; +#endif // ASMJIT_X64 + + case VARIABLE_TYPE_X87: + case VARIABLE_TYPE_X87_1F: + case VARIABLE_TYPE_X87_1D: + // TODO: X87 VARIABLES NOT IMPLEMENTED. + break; + + // NOTE: MM and XMM registers shoudln't be exchanged using this way, it's + // correct, but it sucks. + + case VARIABLE_TYPE_MM: + { + MMReg a = mm(regIndex); + MMReg b = mm(vdata->registerIndex); + + _compiler->emit(INST_PXOR, a, b); + _compiler->emit(INST_PXOR, b, a); + _compiler->emit(INST_PXOR, a, b); + break; + } + + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + { + XMMReg a = xmm(regIndex); + XMMReg b = xmm(vdata->registerIndex); + + _compiler->emit(INST_XORPS, a, b); + _compiler->emit(INST_XORPS, b, a); + _compiler->emit(INST_XORPS, a, b); + break; + } + + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + { + XMMReg a = xmm(regIndex); + XMMReg b = xmm(vdata->registerIndex); + + _compiler->emit(INST_XORPD, a, b); + _compiler->emit(INST_XORPD, b, a); + _compiler->emit(INST_XORPD, a, b); + break; + } + + case VARIABLE_TYPE_XMM: + { + XMMReg a = xmm(regIndex); + XMMReg b = xmm(vdata->registerIndex); + + _compiler->emit(INST_PXOR, a, b); + _compiler->emit(INST_PXOR, b, a); + _compiler->emit(INST_PXOR, a, b); + break; + } + } +} + +void CompilerContext::_postAlloc(VarData* vdata, uint32_t vflags) ASMJIT_NOTHROW +{ + if (vflags & VARIABLE_ALLOC_WRITE) vdata->changed = true; +} + +void CompilerContext::_markMemoryUsed(VarData* vdata) ASMJIT_NOTHROW +{ + if (vdata->homeMemoryData != NULL) return; + + VarMemBlock* mem = _allocMemBlock(vdata->size); + if (!mem) return; + + vdata->homeMemoryData = mem; +} + +Mem CompilerContext::_getVarMem(VarData* vdata) ASMJIT_NOTHROW +{ + Mem m; + m._mem.id = vdata->id; + if (!vdata->isMemArgument) m._mem.displacement = _adjustESP; + + _markMemoryUsed(vdata); + return m; +} + +static int32_t getSpillScore(VarData* v, uint32_t currentOffset) +{ + int32_t score = 0; + + ASMJIT_ASSERT(v->lastEmittable != NULL); + uint32_t lastOffset = v->lastEmittable->getOffset(); + + if (lastOffset >= currentOffset) + score += (int32_t)(lastOffset - currentOffset); + + // Each write access decreases probability of spill. + score -= (int32_t)v->registerWriteCount + (int32_t)v->registerRWCount; + // Each read-only access increases probability of spill. + score += (int32_t)v->registerReadCount; + + // Each memory access increases probability of spill. + score += (int32_t)v->memoryWriteCount + (int32_t)v->memoryRWCount; + score += (int32_t)v->memoryReadCount; + + return score; +} + +VarData* CompilerContext::_getSpillCandidateGP() ASMJIT_NOTHROW +{ + return _getSpillCandidateGeneric(_state.gp, REG_NUM_GP); +} + +VarData* CompilerContext::_getSpillCandidateMM() ASMJIT_NOTHROW +{ + return _getSpillCandidateGeneric(_state.mm, REG_NUM_MM); +} + +VarData* CompilerContext::_getSpillCandidateXMM() ASMJIT_NOTHROW +{ + return _getSpillCandidateGeneric(_state.xmm, REG_NUM_XMM); +} + +VarData* CompilerContext::_getSpillCandidateGeneric(VarData** varArray, uint32_t count) ASMJIT_NOTHROW +{ + uint32_t i; + + VarData* candidate = NULL; + uint32_t candidatePriority = 0; + int32_t candidateScore = 0; + + uint32_t currentOffset = _compiler->getCurrentEmittable()->getOffset(); + + for (i = 0; i < count; i++) + { + // Get variable. + VarData* vdata = varArray[i]; + + // Never spill variables needed for next instruction. + if (vdata == NULL || vdata->workOffset == _currentOffset) continue; + + uint32_t variablePriority = vdata->priority; + int32_t variableScore = getSpillScore(vdata, currentOffset); + + if ((candidate == NULL) || + (variablePriority > candidatePriority) || + (variablePriority == candidatePriority && variableScore > candidateScore)) + { + candidate = vdata; + candidatePriority = variablePriority; + candidateScore = variableScore; + } + } + + return candidate; +} + +void CompilerContext::_addActive(VarData* vdata) ASMJIT_NOTHROW +{ + // Never call with variable that is already in active list. + ASMJIT_ASSERT(vdata->nextActive == NULL); + ASMJIT_ASSERT(vdata->prevActive == NULL); + + if (_active == NULL) + { + vdata->nextActive = vdata; + vdata->prevActive = vdata; + + _active = vdata; + } + else + { + VarData* vlast = _active->prevActive; + + vlast->nextActive = vdata; + _active->prevActive = vdata; + + vdata->nextActive = _active; + vdata->prevActive = vlast; + } +} + +void CompilerContext::_freeActive(VarData* vdata) ASMJIT_NOTHROW +{ + VarData* next = vdata->nextActive; + VarData* prev = vdata->prevActive; + + if (prev == next) + { + _active = NULL; + } + else + { + if (_active == vdata) _active = next; + prev->nextActive = next; + next->prevActive = prev; + } + + vdata->nextActive = NULL; + vdata->prevActive = NULL; +} + +void CompilerContext::_freeAllActive() ASMJIT_NOTHROW +{ + if (_active == NULL) return; + + VarData* cur = _active; + for (;;) + { + VarData* next = cur->nextActive; + cur->nextActive = NULL; + cur->prevActive = NULL; + if (next == _active) break; + } + + _active = NULL; +} + +void CompilerContext::_allocatedVariable(VarData* vdata) ASMJIT_NOTHROW +{ + uint32_t idx = vdata->registerIndex; + + switch (vdata->type) + { + case VARIABLE_TYPE_GPD: + case VARIABLE_TYPE_GPQ: + _state.gp[idx] = vdata; + _allocatedGPRegister(idx); + break; + + case VARIABLE_TYPE_MM: + _state.mm[idx] = vdata; + _allocatedMMRegister(idx); + break; + + case VARIABLE_TYPE_XMM: + case VARIABLE_TYPE_XMM_1F: + case VARIABLE_TYPE_XMM_4F: + case VARIABLE_TYPE_XMM_1D: + case VARIABLE_TYPE_XMM_2D: + _state.xmm[idx] = vdata; + _allocatedXMMRegister(idx); + break; + + default: + ASMJIT_ASSERT(0); + break; + } +} + +void CompilerContext::translateOperands(Operand* operands, uint32_t count) ASMJIT_NOTHROW +{ + uint32_t i; + + // Translate variables to registers. + for (i = 0; i < count; i++) + { + Operand& o = operands[i]; + + if (o.isVar()) + { + VarData* vdata = _compiler->_getVarData(o.getId()); + ASMJIT_ASSERT(vdata != NULL); + + o._reg.op = OPERAND_REG; + o._reg.code |= vdata->registerIndex; + } + else if (o.isMem()) + { + if ((o.getId() & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + // Memory access. We just increment here actual displacement. + VarData* vdata = _compiler->_getVarData(o.getId()); + ASMJIT_ASSERT(vdata != NULL); + + o._mem.displacement += vdata->isMemArgument + ? _argumentsActualDisp + : _variablesActualDisp; + // NOTE: This is not enough, variable position will be patched later + // by CompilerContext::_patchMemoryOperands(). + } + else if ((o._mem.base & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(o._mem.base); + ASMJIT_ASSERT(vdata != NULL); + + o._mem.base = vdata->registerIndex; + } + + if ((o._mem.index & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(o._mem.index); + ASMJIT_ASSERT(vdata != NULL); + + o._mem.index = vdata->registerIndex; + } + } + } +} + +void CompilerContext::addBackwardCode(EJmp* from) ASMJIT_NOTHROW +{ + _backCode.append(from); +} + +void CompilerContext::addForwardJump(EJmp* inst) ASMJIT_NOTHROW +{ + ForwardJumpData* j = + reinterpret_cast<ForwardJumpData*>(_zone.zalloc(sizeof(ForwardJumpData))); + if (j == NULL) { _compiler->setError(ERROR_NO_HEAP_MEMORY); return; } + + j->inst = inst; + j->state = _saveState(); + j->next = _forwardJumps; + _forwardJumps = j; +} + +StateData* CompilerContext::_saveState() ASMJIT_NOTHROW +{ + // Get count of variables stored in memory. + uint32_t memVarsCount = 0; + VarData* cur = _active; + if (cur) + { + do { + if (cur->state == VARIABLE_STATE_MEMORY) memVarsCount++; + cur = cur->nextActive; + } while (cur != _active); + } + + // Alloc StateData structure (using zone allocator) and copy current state into it. + StateData* state = _compiler->_newStateData(memVarsCount); + memcpy(state, &_state, sizeof(StateData)); + + // Clear changed flags. + state->changedGP = 0; + state->changedMM = 0; + state->changedXMM = 0; + + uint i; + uint mask; + + // Save variables stored in REGISTERs and CHANGE flag. + for (i = 0, mask = 1; i < REG_NUM_GP; i++, mask <<= 1) + { + if (state->gp[i] && state->gp[i]->changed) state->changedGP |= mask; + } + + for (i = 0, mask = 1; i < REG_NUM_MM; i++, mask <<= 1) + { + if (state->mm[i] && state->mm[i]->changed) state->changedMM |= mask; + } + + for (i = 0, mask = 1; i < REG_NUM_XMM; i++, mask <<= 1) + { + if (state->xmm[i] && state->xmm[i]->changed) state->changedXMM |= mask; + } + + // Save variables stored in MEMORY. + state->memVarsCount = memVarsCount; + memVarsCount = 0; + + cur = _active; + if (cur) + { + do { + if (cur->state == VARIABLE_STATE_MEMORY) state->memVarsData[memVarsCount++] = cur; + cur = cur->nextActive; + } while (cur != _active); + } + + // Finished. + return state; +} + +void CompilerContext::_assignState(StateData* state) ASMJIT_NOTHROW +{ + Compiler* compiler = getCompiler(); + + memcpy(&_state, state, sizeof(StateData)); + _state.memVarsCount = 0; + + uint i, mask; + VarData* vdata; + + // Unuse all variables first. + vdata = _active; + if (vdata) + { + do { + vdata->state = VARIABLE_STATE_UNUSED; + vdata = vdata->nextActive; + } while (vdata != _active); + } + + // Assign variables stored in memory which are not unused. + for (i = 0; i < state->memVarsCount; i++) + { + state->memVarsData[i]->state = VARIABLE_STATE_MEMORY; + } + + // Assign allocated variables. + for (i = 0, mask = 1; i < REG_NUM_GP; i++, mask <<= 1) + { + if ((vdata = _state.gp[i]) != NULL) + { + vdata->state = VARIABLE_STATE_REGISTER; + vdata->registerIndex = i; + vdata->changed = (_state.changedGP & mask) != 0; + } + } + + for (i = 0, mask = 1; i < REG_NUM_MM; i++, mask <<= 1) + { + if ((vdata = _state.mm[i]) != NULL) + { + vdata->state = VARIABLE_STATE_REGISTER; + vdata->registerIndex = i; + vdata->changed = (_state.changedMM & mask) != 0; + } + } + + for (i = 0, mask = 1; i < REG_NUM_XMM; i++, mask <<= 1) + { + if ((vdata = _state.xmm[i]) != NULL) + { + vdata->state = VARIABLE_STATE_REGISTER; + vdata->registerIndex = i; + vdata->changed = (_state.changedXMM & mask) != 0; + } + } +} + +void CompilerContext::_restoreState(StateData* state, uint32_t targetOffset) ASMJIT_NOTHROW +{ + // 16 + 8 + 16 = GP + MMX + XMM registers. + static const uint STATE_REGS_COUNT = 16 + 8 + 16; + + StateData* fromState = &_state; + StateData* toState = state; + + // No change, rare... + if (fromState == toState) return; + + uint base; + uint i; + + // -------------------------------------------------------------------------- + // Set target state to all variables. vdata->tempInt is target state in this + // function. + // -------------------------------------------------------------------------- + + { + // UNUSED. + VarData* vdata = _active; + if (vdata) + { + do { + vdata->tempInt = VARIABLE_STATE_UNUSED; + vdata = vdata->nextActive; + } while (vdata != _active); + } + + // MEMORY. + for (i = 0; i < toState->memVarsCount; i++) + { + toState->memVarsData[i]->tempInt = VARIABLE_STATE_MEMORY; + } + + // REGISTER. + for (i = 0; i < StateData::NUM_REGS; i++) + { + if ((vdata = toState->regs[i]) != NULL) vdata->tempInt = VARIABLE_STATE_REGISTER; + } + } + + // -------------------------------------------------------------------------- + // [GP-Registers Switch] + // -------------------------------------------------------------------------- + + // TODO. +#if 0 + for (i = 0; i < REG_NUM_GP; i++) + { + VarData* fromVar = fromState->gp[i]; + VarData* toVar = toState->gp[i]; + + if (fromVar != toVar) + { + if (fromVar != NULL) + { + if (toVar != NULL) + { + if (fromState->gp[to + } + else + { + // It is possible that variable that was saved in state currently not + // exists (tempInt is target scope!). + if (fromVar->tempInt == VARIABLE_STATE_UNUSED) + { + unuseVar(fromVar, VARIABLE_STATE_UNUSED); + } + else + { + spillVar(fromVar); + } + } + } + } + else if (fromVar != NULL) + { + uint32_t mask = Util::maskFromIndex(i); + // Variables are the same, we just need to compare changed flags. + if ((fromState->changedGP & mask) && !(toState->changedGP & mask)) saveVar(fromVar); + } + } +#endif + + // Spill. + for (base = 0, i = 0; i < STATE_REGS_COUNT; i++) + { + // Change the base offset (from base offset the register index can be + // calculated). + if (i == 16 || i == 16 + 8) base = i; + uint32_t regIndex = i - base; + + VarData* fromVar = fromState->regs[i]; + VarData* toVar = toState->regs[i]; + + if (fromVar != toVar) + { + + // Spill the register. + if (fromVar != NULL) + { + // It is possible that variable that was saved in state currently not + // exists (tempInt is target scope!). + if (fromVar->tempInt == VARIABLE_STATE_UNUSED) + { + unuseVar(fromVar, VARIABLE_STATE_UNUSED); + } + else + { + spillVar(fromVar); + } + } + } + else if (fromVar != NULL) + { + uint32_t mask = Util::maskFromIndex(regIndex); + // Variables are the same, we just need to compare changed flags. + if ((fromState->changedGP & mask) && !(toState->changedGP & mask)) + { + saveVar(fromVar); + } + } + } + + // Alloc. + for (base = 0, i = 0; i < STATE_REGS_COUNT; i++) + { + if (i == 16 || i == 24) base = i; + + VarData* fromVar = fromState->regs[i]; + VarData* toVar = toState->regs[i]; + + if (fromVar != toVar) + { + uint32_t regIndex = i - base; + + // Alloc register + if (toVar != NULL) + { + allocVar(toVar, Util::maskFromIndex(regIndex), VARIABLE_ALLOC_READ); + } + } + + // TODO: + //if (toVar) + //{ + // toVar->changed = to->changed; + //} + } + + // -------------------------------------------------------------------------- + // Update used masks. + // -------------------------------------------------------------------------- + + _state.usedGP = state->usedGP; + _state.usedMM = state->usedMM; + _state.usedXMM = state->usedXMM; + + // -------------------------------------------------------------------------- + // Update changed masks and cleanup. + // -------------------------------------------------------------------------- + + { + VarData* vdata = _active; + if (vdata) + { + do { + if (vdata->tempInt != VARIABLE_STATE_REGISTER) + { + vdata->state = (int)vdata->tempInt; + vdata->changed = false; + } + + vdata->tempInt = 0; + vdata = vdata->nextActive; + } while (vdata != _active); + } + } +} + +VarMemBlock* CompilerContext::_allocMemBlock(uint32_t size) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(size != 0); + + // First try to find mem blocks. + VarMemBlock* mem = _memFree; + VarMemBlock* prev = NULL; + + while (mem) + { + VarMemBlock* next = mem->nextFree; + + if (mem->size == size) + { + if (prev) + prev->nextFree = next; + else + _memFree = next; + + mem->nextFree = NULL; + return mem; + } + + prev = mem; + mem = next; + } + + // Never mind, create new. + mem = reinterpret_cast<VarMemBlock*>(_zone.zalloc(sizeof(VarMemBlock))); + if (!mem) + { + _compiler->setError(ERROR_NO_HEAP_MEMORY); + return NULL; + } + + mem->offset = 0; + mem->size = size; + + mem->nextUsed = _memUsed; + mem->nextFree = NULL; + + _memUsed = mem; + + switch (size) + { + case 16: _mem16BlocksCount++; break; + case 8: _mem8BlocksCount++; break; + case 4: _mem4BlocksCount++; break; + } + + return mem; +} + +void CompilerContext::_freeMemBlock(VarMemBlock* mem) ASMJIT_NOTHROW +{ + // Add mem to free blocks. + mem->nextFree = _memFree; + _memFree = mem; +} + +void CompilerContext::_allocMemoryOperands() ASMJIT_NOTHROW +{ + VarMemBlock* mem; + + // Variables are allocated in this order: + // 1. 16-byte variables. + // 2. 8-byte variables. + // 3. 4-byte variables. + // 4. All others. + + uint32_t start16 = 0; + uint32_t start8 = start16 + _mem16BlocksCount * 16; + uint32_t start4 = start8 + _mem8BlocksCount * 8; + uint32_t startX = (start4 + _mem4BlocksCount * 4 + 15) & ~15; + + for (mem = _memUsed; mem; mem = mem->nextUsed) + { + uint32_t size = mem->size; + uint32_t offset; + + switch (size) + { + case 16: + offset = start16; + start16 += 16; + break; + + case 8: + offset = start8; + start8 += 8; + break; + + case 4: + offset = start4; + start4 += 4; + break; + + default: + // Align to 16 bytes if size is 16 or more. + if (size >= 16) + { + size = (size + 15) & ~15; + startX = (startX + 15) & ~15; + } + offset = startX; + startX += size; + break; + } + + mem->offset = (int32_t)offset; + _memBytesTotal += size; + } +} + +void CompilerContext::_patchMemoryOperands(Emittable* start, Emittable* stop) ASMJIT_NOTHROW +{ + Emittable* cur; + + for (cur = start;; cur = cur->getNext()) + { + if (cur->getType() == EMITTABLE_INSTRUCTION) + { + Mem* mem = reinterpret_cast<EInstruction*>(cur)->_memOp; + + if (mem && (mem->_mem.id & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_VAR) + { + VarData* vdata = _compiler->_getVarData(mem->_mem.id); + ASMJIT_ASSERT(vdata != NULL); + + if (vdata->isMemArgument) + { + mem->_mem.base = _argumentsBaseReg; + mem->_mem.displacement += vdata->homeMemoryOffset; + mem->_mem.displacement += _argumentsBaseOffset; + } + else + { + VarMemBlock* mb = reinterpret_cast<VarMemBlock*>(vdata->homeMemoryData); + ASMJIT_ASSERT(mb != NULL); + + mem->_mem.base = _variablesBaseReg; + mem->_mem.displacement += mb->offset; + mem->_mem.displacement += _variablesBaseOffset; + } + } + } + if (cur == stop) break; + } +} + +// ============================================================================ +// [AsmJit::CompilerUtil] +// ============================================================================ + +bool CompilerUtil::isStack16ByteAligned() +{ + // Stack is always aligned to 16-bytes when using 64-bit OS. + bool result = (sizeof(sysuint_t) == 8); + + // Modern Linux, APPLE and UNIX guarantees stack alignment to 16 bytes by + // default. I'm really not sure about all UNIX operating systems, because + // 16-byte alignment is an addition to the older specification. +#if (defined(__linux__) || \ + defined(__linux) || \ + defined(linux) || \ + defined(__unix__) || \ + defined(__FreeBSD__) || \ + defined(__NetBSD__) || \ + defined(__OpenBSD__) || \ + defined(__DARWIN__) || \ + defined(__APPLE__) ) + result = true; +#endif // __linux__ + + return result; +} + +// ============================================================================ +// [AsmJit::CompilerCore - Construction / Destruction] +// ============================================================================ + +CompilerCore::CompilerCore(CodeGenerator* codeGenerator) ASMJIT_NOTHROW : + _codeGenerator(codeGenerator != NULL ? codeGenerator : CodeGenerator::getGlobal()), + _zone(16384 - sizeof(Zone::Chunk) - 32), + _logger(NULL), + _error(0), + _properties((1 << PROPERTY_OPTIMIZE_ALIGN)), + _emitOptions(0), + _finished(false), + _first(NULL), + _last(NULL), + _current(NULL), + _function(NULL), + _varNameId(0), + _cc(NULL) +{ +} + +CompilerCore::~CompilerCore() ASMJIT_NOTHROW +{ + free(); +} + +// ============================================================================ +// [AsmJit::CompilerCore - Logging] +// ============================================================================ + +void CompilerCore::setLogger(Logger* logger) ASMJIT_NOTHROW +{ + _logger = logger; +} + +// ============================================================================ +// [AsmJit::CompilerCore - Error Handling] +// ============================================================================ + +void CompilerCore::setError(uint32_t error) ASMJIT_NOTHROW +{ + _error = error; + if (_error == ERROR_NONE) return; + + if (_logger) + { + _logger->logFormat("*** COMPILER ERROR: %s (%u).\n", + getErrorCodeAsString(error), + (unsigned int)error); + } +} + +// ============================================================================ +// [AsmJit::CompilerCore - Properties] +// ============================================================================ + +uint32_t CompilerCore::getProperty(uint32_t propertyId) +{ + return (_properties & (1 << propertyId)) != 0; +} + +void CompilerCore::setProperty(uint32_t propertyId, uint32_t value) +{ + if (value) + _properties |= (1 << propertyId); + else + _properties &= ~(1 << propertyId); +} + +// ============================================================================ +// [AsmJit::CompilerCore - Buffer] +// ============================================================================ + +void CompilerCore::clear() ASMJIT_NOTHROW +{ + _finished = false; + + delAll(_first); + _first = NULL; + _last = NULL; + _current = NULL; + + _zone.freeAll(); + _targetData.clear(); + _varData.clear(); + + _cc = NULL; + + if (_error) setError(ERROR_NONE); +} + +void CompilerCore::free() ASMJIT_NOTHROW +{ + clear(); + + _targetData.free(); + _varData.free(); +} + +// ============================================================================ +// [AsmJit::CompilerCore - Emittables] +// ============================================================================ + +void CompilerCore::addEmittable(Emittable* emittable) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(emittable != NULL); + ASMJIT_ASSERT(emittable->_prev == NULL); + ASMJIT_ASSERT(emittable->_next == NULL); + + if (_current == NULL) + { + if (!_first) + { + _first = emittable; + _last = emittable; + } + else + { + emittable->_next = _first; + _first->_prev = emittable; + _first = emittable; + } + } + else + { + Emittable* prev = _current; + Emittable* next = _current->_next; + + emittable->_prev = prev; + emittable->_next = next; + + prev->_next = emittable; + if (next) + next->_prev = emittable; + else + _last = emittable; + } + + _current = emittable; +} + +void CompilerCore::addEmittableAfter(Emittable* emittable, Emittable* ref) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(emittable != NULL); + ASMJIT_ASSERT(emittable->_prev == NULL); + ASMJIT_ASSERT(emittable->_next == NULL); + ASMJIT_ASSERT(ref != NULL); + + Emittable* prev = ref; + Emittable* next = ref->_next; + + emittable->_prev = prev; + emittable->_next = next; + + prev->_next = emittable; + if (next) + next->_prev = emittable; + else + _last = emittable; +} + +void CompilerCore::removeEmittable(Emittable* emittable) ASMJIT_NOTHROW +{ + Emittable* prev = emittable->_prev; + Emittable* next = emittable->_next; + + if (_first == emittable) { _first = next; } else { prev->_next = next; } + if (_last == emittable) { _last = prev; } else { next->_prev = prev; } + + emittable->_prev = NULL; + emittable->_next = NULL; + + if (emittable == _current) _current = prev; +} + +Emittable* CompilerCore::setCurrentEmittable(Emittable* current) ASMJIT_NOTHROW +{ + Emittable* old = _current; + _current = current; + return old; +} + +// ============================================================================ +// [AsmJit::CompilerCore - Logging] +// ============================================================================ + +void CompilerCore::comment(const char* fmt, ...) ASMJIT_NOTHROW +{ + char buf[128]; + char* p = buf; + + if (fmt) + { + *p++ = ';'; + *p++ = ' '; + + va_list ap; + va_start(ap, fmt); + p += vsnprintf(p, 100, fmt, ap); + va_end(ap); + } + + *p++ = '\n'; + *p = '\0'; + + addEmittable(Compiler_newObject<EComment>(this, buf)); +} + +// ============================================================================ +// [AsmJit::CompilerCore - Function Builder] +// ============================================================================ + +EFunction* CompilerCore::newFunction_( + uint32_t callingConvention, + const uint32_t* arguments, + uint32_t argumentsCount, + uint32_t returnValue) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(_function == NULL); + EFunction* f = _function = Compiler_newObject<EFunction>(this); + + f->setPrototype(callingConvention, arguments, argumentsCount, returnValue); + addEmittable(f); + + bind(f->_entryLabel); + addEmittable(f->_prolog); + + _varNameId = 0; + + f->_createVariables(); + return f; +} + +EFunction* CompilerCore::endFunction() ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(_function != NULL); + EFunction* f = _function; + + bind(f->_exitLabel); + addEmittable(f->_epilog); + addEmittable(f->_end); + + f->_finished = true; + _function = NULL; + + return f; +} + +// ============================================================================ +// [AsmJit::CompilerCore - EmitInstruction] +// ============================================================================ + +void CompilerCore::_emitInstruction(uint32_t code) ASMJIT_NOTHROW +{ + EInstruction* e = newInstruction(code, NULL, 0); + if (!e) return; + + addEmittable(e); + if (_cc) { e->_offset = _cc->_currentOffset; e->prepare(*_cc); } +} + +void CompilerCore::_emitInstruction(uint32_t code, const Operand* o0) ASMJIT_NOTHROW +{ + Operand* operands = reinterpret_cast<Operand*>(_zone.zalloc(1 * sizeof(Operand))); + if (!operands) return; + + operands[0] = *o0; + + EInstruction* e = newInstruction(code, operands, 1); + if (!e) return; + + addEmittable(e); + if (_cc) { e->_offset = _cc->_currentOffset; e->prepare(*_cc); } +} + +void CompilerCore::_emitInstruction(uint32_t code, const Operand* o0, const Operand* o1) ASMJIT_NOTHROW +{ + Operand* operands = reinterpret_cast<Operand*>(_zone.zalloc(2 * sizeof(Operand))); + if (!operands) return; + + operands[0] = *o0; + operands[1] = *o1; + + EInstruction* e = newInstruction(code, operands, 2); + if (!e) return; + + addEmittable(e); + if (_cc) { e->_offset = _cc->_currentOffset; e->prepare(*_cc); } +} + +void CompilerCore::_emitInstruction(uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2) ASMJIT_NOTHROW +{ + Operand* operands = reinterpret_cast<Operand*>(_zone.zalloc(3 * sizeof(Operand))); + if (!operands) return; + + operands[0] = *o0; + operands[1] = *o1; + operands[2] = *o2; + + EInstruction* e = newInstruction(code, operands, 3); + if (!e) return; + + addEmittable(e); + if (_cc) { e->_offset = _cc->_currentOffset; e->prepare(*_cc); } +} + +void CompilerCore::_emitInstruction(uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2, const Operand* o3) ASMJIT_NOTHROW +{ + Operand* operands = reinterpret_cast<Operand*>(_zone.zalloc(4 * sizeof(Operand))); + if (!operands) return; + + operands[0] = *o0; + operands[1] = *o1; + operands[2] = *o2; + operands[3] = *o3; + + EInstruction* e = newInstruction(code, operands, 4); + if (!e) return; + + addEmittable(e); + if (_cc) { e->_offset = _cc->_currentOffset; e->prepare(*_cc); } +} + +void CompilerCore::_emitInstruction(uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2, const Operand* o3, const Operand* o4) ASMJIT_NOTHROW +{ + Operand* operands = reinterpret_cast<Operand*>(_zone.zalloc(5 * sizeof(Operand))); + if (!operands) return; + + operands[0] = *o0; + operands[1] = *o1; + operands[2] = *o2; + operands[3] = *o3; + operands[4] = *o4; + + EInstruction* e = newInstruction(code, operands, 5); + if (!e) return; + + addEmittable(e); + if (_cc) { e->_offset = _cc->_currentOffset; e->prepare(*_cc); } +} + +void CompilerCore::_emitJcc(uint32_t code, const Label* label, uint32_t hint) ASMJIT_NOTHROW +{ + if (!hint) + { + _emitInstruction(code, label); + } + else + { + Imm imm(hint); + _emitInstruction(code, label, &imm); + } +} + +ECall* CompilerCore::_emitCall(const Operand* o0) ASMJIT_NOTHROW +{ + EFunction* fn = getFunction(); + if (!fn) { setError(ERROR_NO_FUNCTION); return NULL; } + + ECall* eCall = Compiler_newObject<ECall>(this, fn, o0); + if (!eCall) { setError(ERROR_NO_HEAP_MEMORY); return NULL; } + + addEmittable(eCall); + return eCall; +} + +void CompilerCore::_emitReturn(const Operand* first, const Operand* second) ASMJIT_NOTHROW +{ + EFunction* fn = getFunction(); + if (!fn) { setError(ERROR_NO_FUNCTION); return; } + + ERet* eRet = Compiler_newObject<ERet>(this, fn, first, second); + if (!eRet) { setError(ERROR_NO_HEAP_MEMORY); return; } + + addEmittable(eRet); +} + +// ============================================================================ +// [AsmJit::CompilerCore - Embed] +// ============================================================================ + +void CompilerCore::embed(const void* data, sysuint_t size) ASMJIT_NOTHROW +{ + // Align length to 16 bytes. + sysuint_t alignedSize = (size + 15) & ~15; + + EData* e = + new(_zone.zalloc(sizeof(EData) - sizeof(void*) + alignedSize)) + EData(reinterpret_cast<Compiler*>(this), data, size); + addEmittable(e); +} + +// ============================================================================ +// [AsmJit::CompilerCore - Align] +// ============================================================================ + +void CompilerCore::align(uint32_t m) ASMJIT_NOTHROW +{ + addEmittable(Compiler_newObject<EAlign>(this, m)); +} + +// ============================================================================ +// [AsmJit::CompilerCore - Label] +// ============================================================================ + +Label CompilerCore::newLabel() ASMJIT_NOTHROW +{ + Label label; + label._base.id = (uint32_t)_targetData.getLength() | OPERAND_ID_TYPE_LABEL; + + ETarget* target = Compiler_newObject<ETarget>(this, label); + _targetData.append(target); + + return label; +} + +void CompilerCore::bind(const Label& label) ASMJIT_NOTHROW +{ + uint32_t id = label.getId() & OPERAND_ID_VALUE_MASK; + ASMJIT_ASSERT(id != INVALID_VALUE); + ASMJIT_ASSERT(id < _targetData.getLength()); + + addEmittable(_targetData[id]); +} + +// ============================================================================ +// [AsmJit::CompilerCore - Variables] +// ============================================================================ + +VarData* CompilerCore::_newVarData(const char* name, uint32_t type, uint32_t size) ASMJIT_NOTHROW +{ + VarData* vdata = reinterpret_cast<VarData*>(_zone.zalloc(sizeof(VarData))); + if (vdata == NULL) return NULL; + + char nameBuffer[32]; + if (name == NULL) + { + sprintf(nameBuffer, "var_%d", _varNameId); + name = nameBuffer; + _varNameId++; + } + + vdata->scope = getFunction(); + vdata->firstEmittable = NULL; + vdata->firstCallable = NULL; + vdata->lastEmittable = NULL; + + vdata->name = _zone.zstrdup(name); + vdata->id = (uint32_t)_varData.getLength() | OPERAND_ID_TYPE_VAR; + vdata->type = type; + vdata->size = size; + + vdata->homeRegisterIndex = INVALID_VALUE; + vdata->prefRegisterMask = 0; + + vdata->homeMemoryData = NULL; + + vdata->registerIndex = INVALID_VALUE; + vdata->workOffset = INVALID_VALUE; + + vdata->nextActive = NULL; + vdata->prevActive = NULL; + + vdata->priority = 10; + vdata->calculated = false; + vdata->isRegArgument = false; + vdata->isMemArgument = false; + + vdata->state = VARIABLE_STATE_UNUSED; + vdata->changed = false; + vdata->saveOnUnuse = false; + + vdata->registerReadCount = 0; + vdata->registerWriteCount = 0; + vdata->registerRWCount = 0; + + vdata->registerGPBLoCount = 0; + vdata->registerGPBHiCount = 0; + + vdata->memoryReadCount = 0; + vdata->memoryWriteCount = 0; + vdata->memoryRWCount = 0; + + vdata->tempPtr = NULL; + + _varData.append(vdata); + return vdata; +} + +GPVar CompilerCore::newGP(uint32_t variableType, const char* name) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT((variableType < _VARIABLE_TYPE_COUNT) && + (variableInfo[variableType].clazz & VariableInfo::CLASS_GP) != 0); + +#if defined(ASMJIT_X86) + if (variableInfo[variableType].size > 4) + { + variableType = VARIABLE_TYPE_GPD; + if (_logger) + { + _logger->logString("*** COMPILER WARNING: Translated QWORD variable to DWORD, FIX YOUR CODE! ***\n"); + } + } +#endif // ASMJIT_X86 + + VarData* vdata = _newVarData(name, variableType, variableInfo[variableType].size); + return GPVarFromData(vdata); +} + +GPVar CompilerCore::argGP(uint32_t index) ASMJIT_NOTHROW +{ + GPVar var; + EFunction* f = getFunction(); + + if (f) + { + const FunctionPrototype& prototype = f->getPrototype(); + if (index < prototype.getArgumentsCount()) + { + VarData* vdata = getFunction()->_argumentVariables[index]; + + var._var.id = vdata->id; + var._var.size = vdata->size; + var._var.registerCode = variableInfo[vdata->type].code; + var._var.variableType = vdata->type; + } + } + + return var; +} + +MMVar CompilerCore::newMM(uint32_t variableType, const char* name) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT((variableType < _VARIABLE_TYPE_COUNT) && + (variableInfo[variableType].clazz & VariableInfo::CLASS_MM) != 0); + + VarData* vdata = _newVarData(name, variableType, 8); + return MMVarFromData(vdata); +} + +MMVar CompilerCore::argMM(uint32_t index) ASMJIT_NOTHROW +{ + MMVar var; + EFunction* f = getFunction(); + + if (f) + { + const FunctionPrototype& prototype = f->getPrototype(); + if (prototype.getArgumentsCount() < index) + { + VarData* vdata = getFunction()->_argumentVariables[index]; + + var._var.id = vdata->id; + var._var.size = vdata->size; + var._var.registerCode = variableInfo[vdata->type].code; + var._var.variableType = vdata->type; + } + } + + return var; +} + +XMMVar CompilerCore::newXMM(uint32_t variableType, const char* name) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT((variableType < _VARIABLE_TYPE_COUNT) && + (variableInfo[variableType].clazz & VariableInfo::CLASS_XMM) != 0); + + VarData* vdata = _newVarData(name, variableType, 16); + return XMMVarFromData(vdata); +} + +XMMVar CompilerCore::argXMM(uint32_t index) ASMJIT_NOTHROW +{ + XMMVar var; + EFunction* f = getFunction(); + + if (f) + { + const FunctionPrototype& prototype = f->getPrototype(); + if (prototype.getArgumentsCount() < index) + { + VarData* vdata = getFunction()->_argumentVariables[index]; + + var._var.id = vdata->id; + var._var.size = vdata->size; + var._var.registerCode = variableInfo[vdata->type].code; + var._var.variableType = vdata->type; + } + } + + return var; +} + +void CompilerCore::_vhint(BaseVar& var, uint32_t hintId, uint32_t hintValue) ASMJIT_NOTHROW +{ + if (var.getId() == INVALID_VALUE) return; + + VarData* vdata = _getVarData(var.getId()); + ASMJIT_ASSERT(vdata != NULL); + + EVariableHint* e = Compiler_newObject<EVariableHint>(this, vdata, hintId, hintValue); + addEmittable(e); +} + +void CompilerCore::alloc(BaseVar& var) ASMJIT_NOTHROW +{ + _vhint(var, VARIABLE_HINT_ALLOC, INVALID_VALUE); +} + +void CompilerCore::alloc(BaseVar& var, uint32_t regIndex) ASMJIT_NOTHROW +{ + _vhint(var, VARIABLE_HINT_ALLOC, regIndex); +} + +void CompilerCore::alloc(BaseVar& var, const BaseReg& reg) ASMJIT_NOTHROW +{ + _vhint(var, VARIABLE_HINT_ALLOC, reg.getRegIndex()); +} + +void CompilerCore::save(BaseVar& var) ASMJIT_NOTHROW +{ + _vhint(var, VARIABLE_HINT_SAVE, INVALID_VALUE); +} + +void CompilerCore::spill(BaseVar& var) ASMJIT_NOTHROW +{ + _vhint(var, VARIABLE_HINT_SPILL, INVALID_VALUE); +} + +void CompilerCore::unuse(BaseVar& var) ASMJIT_NOTHROW +{ + _vhint(var, VARIABLE_HINT_UNUSE, INVALID_VALUE); +} + +uint32_t CompilerCore::getPriority(BaseVar& var) const ASMJIT_NOTHROW +{ + if (var.getId() == INVALID_VALUE) return INVALID_VALUE; + + VarData* vdata = _getVarData(var.getId()); + ASMJIT_ASSERT(vdata != NULL); + + return vdata->priority; +} + +void CompilerCore::setPriority(BaseVar& var, uint32_t priority) ASMJIT_NOTHROW +{ + if (var.getId() == INVALID_VALUE) return; + + VarData* vdata = _getVarData(var.getId()); + ASMJIT_ASSERT(vdata != NULL); + + if (priority > 100) priority = 100; + vdata->priority = (uint8_t)priority; +} + +bool CompilerCore::getSaveOnUnuse(BaseVar& var) const ASMJIT_NOTHROW +{ + if (var.getId() == INVALID_VALUE) return false; + + VarData* vdata = _getVarData(var.getId()); + ASMJIT_ASSERT(vdata != NULL); + + return (bool)vdata->saveOnUnuse; +} + +void CompilerCore::setSaveOnUnuse(BaseVar& var, bool value) ASMJIT_NOTHROW +{ + if (var.getId() == INVALID_VALUE) return; + + VarData* vdata = _getVarData(var.getId()); + ASMJIT_ASSERT(vdata != NULL); + + vdata->saveOnUnuse = value; +} + +void CompilerCore::rename(BaseVar& var, const char* name) ASMJIT_NOTHROW +{ + if (var.getId() == INVALID_VALUE) return; + + VarData* vdata = _getVarData(var.getId()); + ASMJIT_ASSERT(vdata != NULL); + + vdata->name = _zone.zstrdup(name); +} + +// ============================================================================ +// [AsmJit::CompilerCore - State] +// ============================================================================ + +StateData* CompilerCore::_newStateData(uint32_t memVarsCount) ASMJIT_NOTHROW +{ + StateData* state = reinterpret_cast<StateData*>(_zone.zalloc(sizeof(StateData) + memVarsCount * sizeof(void*))); + return state; +} + +// ============================================================================ +// [AsmJit::CompilerCore - Make] +// ============================================================================ + +void* CompilerCore::make() ASMJIT_NOTHROW +{ + Assembler a(_codeGenerator); + a._properties = _properties; + a.setLogger(_logger); + + serialize(a); + + if (this->getError()) + { + return NULL; + } + + if (a.getError()) + { + setError(a.getError()); + return NULL; + } + + void* result = a.make(); + if (_logger) + { + _logger->logFormat("*** COMPILER SUCCESS - Wrote %u bytes, code: %u, trampolines: %u.\n\n", + (unsigned int)a.getCodeSize(), + (unsigned int)a.getOffset(), + (unsigned int)a.getTrampolineSize()); + } + return result; +} + +void CompilerCore::serialize(Assembler& a) ASMJIT_NOTHROW +{ + // Context. + CompilerContext cc(reinterpret_cast<Compiler*>(this)); + + Emittable* start = _first; + Emittable* stop = NULL; + + // Register all labels. + a.registerLabels(_targetData.getLength()); + + // Make code. + for (;;) + { + _cc = NULL; + + // ------------------------------------------------------------------------ + // Find a function. + for (;;) + { + if (start == NULL) return; + if (start->getType() == EMITTABLE_FUNCTION) + break; + else + start->emit(a); + + start = start->getNext(); + } + // ------------------------------------------------------------------------ + + // ------------------------------------------------------------------------ + // Setup code generation context. + Emittable* cur; + + cc._function = reinterpret_cast<EFunction*>(start); + cc._start = start; + cc._stop = stop = cc._function->getEnd(); + cc._extraBlock = stop->getPrev(); + + // Detect whether the function generation was finished. + if (!cc._function->_finished || cc._function->getEnd()->getPrev() == NULL) + { + setError(ERROR_INCOMPLETE_FUNCTION); + return; + } + // ------------------------------------------------------------------------ + + // ------------------------------------------------------------------------ + // Step 1: + // - Assign/increment offset to each emittable. + // - Extract variables from instructions. + // - Prepare variables for register allocator: + // - Update read(r) / write(w) / overwrite(x) statistics. + // - Update register / memory usage statistics. + // - Find scope (first / last emittable) of variables. + for (cur = start; ; cur = cur->getNext()) + { + cur->prepare(cc); + if (cur == stop) break; + } + // ------------------------------------------------------------------------ + + // We set compiler context also to Compiler so new emitted instructions + // can call prepare() to itself. + _cc = &cc; + + // ------------------------------------------------------------------------ + // Step 2: + // - Translate special instructions (imul, cmpxchg8b, ...). + // - Alloc registers. + // - Translate forward jumps. + // - Alloc memory operands (variables related). + // - Emit function prolog. + // - Emit function epilog. + // - Patch memory operands (variables related). + // - Dump function prototype and variable statistics (if enabled). + + // Translate special instructions and run alloc registers. + cur = start; + + do { + do { + // Assign current offset for each emittable back to CompilerContext. + cc._currentOffset = cur->_offset; + // Assign previous emittable to compiler so each variable spill/alloc will + // be emitted before. + _current = cur->getPrev(); + + cur = cur->translate(cc); + } while (cur); + + cc._unrecheable = true; + + sysuint_t len = cc._backCode.getLength(); + while (cc._backPos < len) + { + cur = cc._backCode[cc._backPos++]->getNext(); + if (!cur->isTranslated()) break; + + cur = NULL; + } + } while (cur); + + // Translate forward jumps. + { + ForwardJumpData* j = cc._forwardJumps; + while (j) + { + cc._assignState(j->state); + _current = j->inst->getPrev(); + j->inst->_doJump(cc); + j = j->next; + } + } + + // Alloc memory operands (variables related). + cc._allocMemoryOperands(); + + // Emit function prolog / epilog. + cc._function->_preparePrologEpilog(cc); + + _current = cc._function->_prolog; + cc._function->_emitProlog(cc); + + _current = cc._function->_epilog; + cc._function->_emitEpilog(cc); + + // Patch memory operands (variables related). + _current = _last; + cc._patchMemoryOperands(start, stop); + + // Dump function prototype and variable statistics (if enabled). + if (_logger) + { + cc._function->_dumpFunction(cc); + } + // ------------------------------------------------------------------------ + + // ------------------------------------------------------------------------ + // Hack: need to register labels that was created by the Step 2. + if (a._labelData.getLength() < _targetData.getLength()) + { + a.registerLabels(_targetData.getLength() - a._labelData.getLength()); + } + + Emittable* extraBlock = cc._extraBlock; + + // Step 3: + // - Emit instructions to Assembler stream. + for (cur = start; ; cur = cur->getNext()) + { + cur->emit(a); + if (cur == extraBlock) break; + } + // ------------------------------------------------------------------------ + + // ------------------------------------------------------------------------ + // Step 4: + // - Emit everything else (post action). + for (cur = start; ; cur = cur->getNext()) + { + cur->post(a); + if (cur == extraBlock) break; + } + // ------------------------------------------------------------------------ + + start = extraBlock->getNext(); + cc._clear(); + } +} + +// ============================================================================ +// [AsmJit::Compiler - Construction / Destruction] +// ============================================================================ + +Compiler::Compiler(CodeGenerator* codeGenerator) ASMJIT_NOTHROW : + CompilerIntrinsics(codeGenerator) +{ +} + +Compiler::~Compiler() ASMJIT_NOTHROW +{ +} + +} // AsmJit namespace + +// [Api-End] +#include "ApiEnd.h" diff --git a/lib/AsmJit/CompilerX86X64.h b/lib/AsmJit/CompilerX86X64.h new file mode 100644 index 0000000..ceb228d --- /dev/null +++ b/lib/AsmJit/CompilerX86X64.h @@ -0,0 +1,8891 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_COMPILERX86X64_H +#define _ASMJIT_COMPILERX86X64_H + +#if !defined(_ASMJIT_COMPILER_H) +#warning "AsmJit/CompilerX86X64.h can be only included by AsmJit/Compiler.h" +#endif // _ASMJIT_COMPILER_H + +// [Dependencies] +#include "Build.h" +#include "Assembler.h" +#include "Defs.h" +#include "Operand.h" +#include "Util.h" + +#include <string.h> + +// A little bit C++. +#include <new> + +// [Api-Begin] +#include "ApiBegin.h" + +//! @internal +//! +//! @brief Mark methods not supported by @ref Compiler. These methods are +//! usually used only in function prologs/epilogs or to manage stack. +#define ASMJIT_NOT_SUPPORTED_BY_COMPILER 0 + +namespace AsmJit { + +//! @addtogroup AsmJit_Compiler +//! @{ + +// ============================================================================ +// [Forward Declarations] +// ============================================================================ + +struct CodeGenerator; + +// ============================================================================ +// [AsmJit::TypeToId] +// ============================================================================ + +// Skip documenting this. +#if !defined(ASMJIT_NODOC) + +ASMJIT_DECLARE_TYPE_AS_ID(int8_t, VARIABLE_TYPE_GPD); +ASMJIT_DECLARE_TYPE_AS_ID(uint8_t, VARIABLE_TYPE_GPD); + +ASMJIT_DECLARE_TYPE_AS_ID(int16_t, VARIABLE_TYPE_GPD); +ASMJIT_DECLARE_TYPE_AS_ID(uint16_t, VARIABLE_TYPE_GPD); + +ASMJIT_DECLARE_TYPE_AS_ID(int32_t, VARIABLE_TYPE_GPD); +ASMJIT_DECLARE_TYPE_AS_ID(uint32_t, VARIABLE_TYPE_GPD); + +#if defined(ASMJIT_X64) +ASMJIT_DECLARE_TYPE_AS_ID(int64_t, VARIABLE_TYPE_GPQ); +ASMJIT_DECLARE_TYPE_AS_ID(uint64_t, VARIABLE_TYPE_GPQ); +#endif // ASMJIT_X64 + +ASMJIT_DECLARE_TYPE_AS_ID(float, VARIABLE_TYPE_FLOAT); +ASMJIT_DECLARE_TYPE_AS_ID(double, VARIABLE_TYPE_DOUBLE); + +#endif // !ASMJIT_NODOC + +// ============================================================================ +// [AsmJit::FunctionPrototype] +// ============================================================================ + +//! @brief Calling convention and function argument handling. +struct ASMJIT_API FunctionPrototype +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref FunctionPrototype instance. + FunctionPrototype() ASMJIT_NOTHROW; + //! @brief Destroy the @ref FunctionPrototype instance. + ~FunctionPrototype() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Argument] + // -------------------------------------------------------------------------- + + //! @brief Function argument location. + struct Argument + { + //! @brief Variable type, see @c VARIABLE_TYPE. + uint32_t variableType; + //! @brief Register index if argument is passed through register, otherwise + //! @c INVALID_VALUE. + uint32_t registerIndex; + //! @brief Stack offset if argument is passed through stack, otherwise + //! @c INVALID_VALUE. + int32_t stackOffset; + + //! @brief Get whether the argument is assigned, for private use only. + inline bool isAssigned() const ASMJIT_NOTHROW + { return registerIndex != INVALID_VALUE || stackOffset != (int32_t)INVALID_VALUE; } + }; + + // -------------------------------------------------------------------------- + // [Methods] + // -------------------------------------------------------------------------- + + //! @brief Set function prototype. + //! + //! This will set function calling convention and setup arguments variables. + //! + //! @note This function will allocate variables, it can be called only once. + void setPrototype( + uint32_t callingConvention, + const uint32_t* arguments, + uint32_t argumentsCount, + uint32_t returnValue) ASMJIT_NOTHROW; + + //! @brief Get function calling convention, see @c CALL_CONV. + inline uint32_t getCallingConvention() const ASMJIT_NOTHROW { return _callingConvention; } + + //! @brief Get whether the callee pops the stack. + inline uint32_t getCalleePopsStack() const ASMJIT_NOTHROW { return _calleePopsStack; } + + //! @brief Get function arguments. + inline Argument* getArguments() ASMJIT_NOTHROW { return _arguments; } + //! @brief Get function arguments (const version). + inline const Argument* getArguments() const ASMJIT_NOTHROW { return _arguments; } + + //! @brief Get count of arguments. + inline uint32_t getArgumentsCount() const ASMJIT_NOTHROW { return _argumentsCount; } + + //! @brief Get function return value or @ref INVALID_VALUE if it's void. + inline uint32_t getReturnValue() const ASMJIT_NOTHROW { return _returnValue; } + + //! @brief Get direction of arguments passed on the stack. + //! + //! Direction should be always @c ARGUMENT_DIR_RIGHT_TO_LEFT. + //! + //! @note This is related to used calling convention, it's not affected by + //! number of function arguments or their types. + inline uint32_t getArgumentsDirection() const ASMJIT_NOTHROW { return _argumentsDirection; } + + //! @brief Get stack size needed for function arguments passed on the stack. + inline uint32_t getArgumentsStackSize() const ASMJIT_NOTHROW { return _argumentsStackSize; } + + //! @brief Get registers used to pass first integer parameters by current + //! calling convention. + //! + //! @note This is related to used calling convention, it's not affected by + //! number of function arguments or their types. + inline const uint32_t* getArgumentsGPList() const ASMJIT_NOTHROW { return _argumentsGPList; } + + //! @brief Get registers used to pass first SP-FP or DP-FPparameters by + //! current calling convention. + //! + //! @note This is related to used calling convention, it's not affected by + //! number of function arguments or their types. + inline const uint32_t* getArgumentsXMMList() const ASMJIT_NOTHROW { return _argumentsXMMList; } + + //! @brief Get bitmask of GP registers which might be used for arguments. + inline uint32_t getArgumentsGP() const ASMJIT_NOTHROW { return _argumentsGP; } + //! @brief Get bitmask of MM registers which might be used for arguments. + inline uint32_t getArgumentsMM() const ASMJIT_NOTHROW { return _argumentsMM; } + //! @brief Get bitmask of XMM registers which might be used for arguments. + inline uint32_t getArgumentsXMM() const ASMJIT_NOTHROW { return _argumentsXMM; } + + //! @brief Get bitmask of general purpose registers that's preserved + //! (non-volatile). + //! + //! @note This is related to used calling convention, it's not affected by + //! number of function arguments or their types. + inline uint32_t getPreservedGP() const ASMJIT_NOTHROW { return _preservedGP; } + + //! @brief Get bitmask of MM registers that's preserved (non-volatile). + //! + //! @note No standardized calling function is not preserving MM registers. + //! This member is here for extension writers who need for some reason custom + //! calling convention that can be called through code generated by AsmJit + //! (or other runtime code generator). + inline uint32_t getPreservedMM() const ASMJIT_NOTHROW { return _preservedMM; } + + //! @brief Return bitmask of XMM registers that's preserved (non-volatile). + //! + //! @note This is related to used calling convention, it's not affected by + //! number of function arguments or their types. + inline uint32_t getPreservedXMM() const ASMJIT_NOTHROW { return _preservedXMM; } + + //! @brief Get mask of all GP registers used to pass function arguments. + inline uint32_t getPassedGP() const ASMJIT_NOTHROW { return _passedGP; } + //! @brief Get mask of all MM registers used to pass function arguments. + inline uint32_t getPassedMM() const ASMJIT_NOTHROW { return _passedMM; } + //! @brief Get mask of all XMM registers used to pass function arguments. + inline uint32_t getPassedXMM() const ASMJIT_NOTHROW { return _passedXMM; } + + //! @brief Find argument (id) by the register code. Used mainly by @ref ECall + //! emittable. + uint32_t findArgumentByRegisterCode(uint32_t regCode) const ASMJIT_NOTHROW; + +protected: + + // -------------------------------------------------------------------------- + // [Private] + // -------------------------------------------------------------------------- + + void _clear() ASMJIT_NOTHROW; + void _setCallingConvention(uint32_t callingConvention) ASMJIT_NOTHROW; + void _setPrototype( + const uint32_t* arguments, + uint32_t argumentsCount, + uint32_t returnValue) ASMJIT_NOTHROW; + void _setReturnValue(uint32_t valueId) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Calling convention. + uint32_t _callingConvention; + //! @brief Whether callee pops stack. + uint32_t _calleePopsStack; + + //! @brief List of arguments, their register codes or stack locations. + Argument _arguments[FUNC_MAX_ARGS]; + + //! @brief Function return value. + uint32_t _returnValue; + + //! @brief Count of arguments (in @c _argumentsList). + uint32_t _argumentsCount; + //! @brief Direction for arguments passed on the stack, see @c ARGUMENT_DIR. + uint32_t _argumentsDirection; + //! @brief Count of bytes consumed by arguments on the stack. + uint32_t _argumentsStackSize; + + //! @brief List of registers that's used for first GP arguments. + uint32_t _argumentsGPList[16]; + //! @brief List of registers that's used for first XMM arguments. + uint32_t _argumentsXMMList[16]; + + //! @brief Bitmask for preserved GP registers. + uint32_t _argumentsGP; + //! @brief Bitmask for preserved MM registers. + uint32_t _argumentsMM; + //! @brief Bitmask for preserved XMM registers. + uint32_t _argumentsXMM; + + //! @brief Bitmask for preserved GP registers. + uint32_t _preservedGP; + //! @brief Bitmask for preserved MM registers. + uint32_t _preservedMM; + //! @brief Bitmask for preserved XMM registers. + uint32_t _preservedXMM; + + // Set by _setPrototype(). + + //! @brief Bitmask for GP registers used as function arguments. + uint32_t _passedGP; + //! @brief Bitmask for GP registers used as function arguments. + uint32_t _passedMM; + //! @brief Bitmask for GP registers used as function arguments. + uint32_t _passedXMM; +}; + +// ============================================================================ +// [AsmJit::VarData] +// ============================================================================ + +//! @brief Variable data (used internally by @c Compiler). +struct VarData +{ + // -------------------------------------------------------------------------- + // [Scope] + // -------------------------------------------------------------------------- + + //! @brief Scope (NULL if variable is global). + EFunction* scope; + + //! @brief The first emittable where the variable is accessed. + //! + //! @note If this member is @c NULL then variable is unused. + Emittable* firstEmittable; + //! @brief The first callable (ECall) which is after the @c firstEmittable. + ECall* firstCallable; + //! @brief The last emittable where the variable is accessed. + Emittable* lastEmittable; + + // -------------------------------------------------------------------------- + // [Id / Name] + // -------------------------------------------------------------------------- + + //! @brief Variable name. + const char* name; + //! @brief Variable id. + uint32_t id; + //! @brief Variable type. + uint32_t type; + //! @brief Variable size. + uint32_t size; + + // -------------------------------------------------------------------------- + // [Home] + // -------------------------------------------------------------------------- + + //! @brief Home register index or @c INVALID_VALUE (used by register allocator). + uint32_t homeRegisterIndex; + //! @brief Preferred register index. + uint32_t prefRegisterMask; + + //! @brief Home memory address offset. + int32_t homeMemoryOffset; + //! @brief Used by @c CompilerContext, do not touch (NULL when created). + void* homeMemoryData; + + // -------------------------------------------------------------------------- + // [Actual] + // -------------------------------------------------------------------------- + + //! @brief Actual register index (connected with actual @c StateData). + uint32_t registerIndex; + //! @brief Actual working offset. This member is set before register allocator + //! is called. If workOffset is same as CompilerContext::_currentOffset then + //! this variable is probably used in next instruction and can't be spilled. + uint32_t workOffset; + + //! @brief Next active variable in circullar double-linked list. + VarData* nextActive; + //! @brief Previous active variable in circullar double-linked list. + VarData* prevActive; + + // -------------------------------------------------------------------------- + // [Flags] + // -------------------------------------------------------------------------- + + //! @brief Variable priority. + uint8_t priority; + //! @brief Whether variable content can be calculated by simple instruction + //! + //! This is used mainly by mmx or sse2 code and variable allocator will + //! never reserve space for this variable. Calculated variables are for + //! example all zeros, all ones, etc. + uint8_t calculated; + //! @brief Whether variable is argument passed through register. + uint8_t isRegArgument; + //! @brief Whether variable is argument passed through memory. + uint8_t isMemArgument; + + //! @brief Variable state (connected with actual @c StateData). + uint8_t state; + //! @brief Whether variable was changed (connected with actual @c StateData). + uint8_t changed; + //! @brief Save on unuse (at end of the variable scope). + uint8_t saveOnUnuse; + + // -------------------------------------------------------------------------- + // [Statistics] + // -------------------------------------------------------------------------- + + //! @brief Register read statistics (used by instructions where this variable needs + //! to be read only). + uint32_t registerReadCount; + //! @brief Register write statistics (used by instructions where this variable needs + //! to be write only). + uint32_t registerWriteCount; + //! @brief Register read+write statistics (used by instructions where this variable + //! needs to be read and write). + uint32_t registerRWCount; + + //! @brief Register GPB.LO statistics (for code generator). + uint32_t registerGPBLoCount; + //! @brief Register GPB.HI statistics (for code generator). + uint32_t registerGPBHiCount; + + //! @brief Memory read statistics. + uint32_t memoryReadCount; + //! @brief Memory write statistics. + uint32_t memoryWriteCount; + //! @brief Memory read+write statistics. + uint32_t memoryRWCount; + + // -------------------------------------------------------------------------- + // [Temporary] + // -------------------------------------------------------------------------- + + //! @brief Temporary data that can be used in prepare/translate stage. + //! + //! Initial value is NULL and each emittable/code that will use it must also + //! clear it. + //! + //! This temporary data is designed to be used by algorithms that need to + //! set some state into the variables, do something and then cleanup. See + //! state-switch and function call. + union + { + void* tempPtr; + sysint_t tempInt; + }; +}; + +// ============================================================================ +// [AsmJit::VarMemBlock] +// ============================================================================ + +struct VarMemBlock +{ + int32_t offset; + uint32_t size; + + VarMemBlock* nextUsed; + VarMemBlock* nextFree; +}; + +// ============================================================================ +// [AsmJit::VarAllocRecord] +// ============================================================================ + +//! @brief Variable alloc record (for each instruction that uses variables). +//! +//! Variable record contains pointer to variable data and register allocation +//! flags. These flags are important to determine the best alloc instruction. +struct VarAllocRecord +{ + //! @brief Variable data (the structure owned by @c Compiler). + VarData* vdata; + //! @brief Variable alloc flags, see @c VARIABLE_ALLOC. + uint32_t vflags; + //! @brief Register mask (default is 0). + uint32_t regMask; +}; + +// ============================================================================ +// [AsmJit::VarCallRecord] +// ============================================================================ + +//! @brief Variable call-fn record (for each callable that uses variables). +//! +//! This record contains variables that are used to call a function (using +//! @c ECall emittable). Each variable contains the registers where it must +//! be and registers where the value will be returned. +struct VarCallRecord +{ + //! @brief Variable data (the structure owned by @c Compiler). + VarData* vdata; + uint32_t flags; + + uint8_t inCount; + uint8_t inDone; + + uint8_t outCount; + uint8_t outDone; + + enum FLAGS + { + FLAG_IN_GP = 0x0001, + FLAG_IN_MM = 0x0002, + FLAG_IN_XMM = 0x0004, + FLAG_IN_STACK = 0x0008, + + FLAG_OUT_EAX = 0x0010, + FLAG_OUT_EDX = 0x0020, + FLAG_OUT_ST0 = 0x0040, + FLAG_OUT_ST1 = 0x0080, + FLAG_OUT_MM0 = 0x0100, + FLAG_OUT_XMM0 = 0x0400, + FLAG_OUT_XMM1 = 0x0800, + + FLAG_IN_MEM_PTR = 0x1000, + FLAG_CALL_OPERAND_REG = 0x2000, + FLAG_CALL_OPERAND_MEM = 0x4000, + + FLAG_UNUSE_AFTER_USE = 0x8000 + }; +}; + +// ============================================================================ +// [AsmJit::VarHintRecord] +// ============================================================================ + +struct VarHintRecord +{ + VarData* vdata; + uint32_t hint; +}; + +// ============================================================================ +// [AsmJit::StateData] +// ============================================================================ + +//! @brief State data. +struct StateData +{ + enum { NUM_REGS = 16 + 8 + 16 }; + + inline void clear() ASMJIT_NOTHROW + { + memset(this, 0, sizeof(*this)); + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + union + { + //! @brief All allocated variables in one array. + VarData* regs[NUM_REGS]; + + struct + { + //! @brief Allocated GP registers. + VarData* gp[16]; + //! @brief Allocated MM registers. + VarData* mm[8]; + //! @brief Allocated XMM registers. + VarData* xmm[16]; + }; + }; + + //! @brief Used GP registers bitmask. + uint32_t usedGP; + //! @brief Used MM registers bitmask. + uint32_t usedMM; + //! @brief Used XMM registers bitmask. + uint32_t usedXMM; + + //! @brief Changed GP registers bitmask. + uint32_t changedGP; + //! @brief Changed MM registers bitmask. + uint32_t changedMM; + //! @brief Changed XMM registers bitmask. + uint32_t changedXMM; + + //! @brief Count of variables in @c memVarsData. + uint32_t memVarsCount; + //! @brief Variables stored in memory (@c VARIABLE_STATE_MEMORY). + //! + //! When saving / restoring state it's important to keep registers which are + //! still in memory. Register is always unused when it is going out-of-scope. + //! All variables which are not here are unused (@c VARIABLE_STATE_UNUSED). + VarData* memVarsData[1]; +}; + +// ============================================================================ +// [AsmJit::ForwardJumpData] +// ============================================================================ + +struct ForwardJumpData +{ + EJmp* inst; + StateData* state; + ForwardJumpData* next; +}; + +// ============================================================================ +// [AsmJit::EVariableHint] +// ============================================================================ + +//! @brief Variable hint. +struct ASMJIT_API EVariableHint : public Emittable +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref EVariableHint instance. + EVariableHint(Compiler* c, VarData* vdata, uint32_t hintId, uint32_t hintValue) ASMJIT_NOTHROW; + //! @brief Destroy the @ref EVariableHInt instance. + virtual ~EVariableHint() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + virtual void prepare(CompilerContext& cc) ASMJIT_NOTHROW; + virtual Emittable* translate(CompilerContext& cc) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Utilities] + // -------------------------------------------------------------------------- + + virtual int getMaxSize() const ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Hint] + // -------------------------------------------------------------------------- + + //! @brief Get assigned variable (data). + inline VarData* getVar() const ASMJIT_NOTHROW { return _vdata; } + + //! @brief Get hint it (see @ref VARIABLE_HINT). + inline uint32_t getHintId() const ASMJIT_NOTHROW { return _hintId; } + //! @brief Get hint value. + inline uint32_t getHintValue() const ASMJIT_NOTHROW { return _hintValue; } + + //! @brief Set hint it (see @ref VARIABLE_HINT). + inline void setHintId(uint32_t hintId) ASMJIT_NOTHROW { _hintId = hintId; } + //! @brief Set hint value. + inline void setHintValue(uint32_t hintValue) ASMJIT_NOTHROW { _hintValue = hintValue; } + + VarData* _vdata; + uint32_t _hintId; + uint32_t _hintValue; +}; + +// ============================================================================ +// [AsmJit::EInstruction] +// ============================================================================ + +//! @brief Emittable that represents single instruction and its operands. +struct ASMJIT_API EInstruction : public Emittable +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref EInstruction instance. + EInstruction(Compiler* c, uint32_t code, Operand* operandsData, uint32_t operandsCount) ASMJIT_NOTHROW; + //! @brief Destroy the @ref EInstruction instance. + virtual ~EInstruction() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + virtual void prepare(CompilerContext& cc) ASMJIT_NOTHROW; + virtual Emittable* translate(CompilerContext& cc) ASMJIT_NOTHROW; + + virtual void emit(Assembler& a) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Utilities] + // -------------------------------------------------------------------------- + + virtual int getMaxSize() const ASMJIT_NOTHROW; + virtual bool _tryUnuseVar(VarData* v) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Instruction Code] + // -------------------------------------------------------------------------- + + //! @brief Get whether the instruction is special. + inline bool isSpecial() const ASMJIT_NOTHROW { return _isSpecial; } + + //! @brief Get whether the instruction is FPU. + inline bool isFPU() const ASMJIT_NOTHROW { return _isFPU; } + + //! @brief Get instruction code, see @c INST_CODE. + inline uint32_t getCode() const ASMJIT_NOTHROW { return _code; } + + //! @brief Set instruction code to @a code. + //! + //! Please do not modify instruction code if you are not know what you are + //! doing. Incorrect instruction code or operands can raise assertion() at + //! runtime. + inline void setCode(uint32_t code) ASMJIT_NOTHROW { _code = code; } + + // -------------------------------------------------------------------------- + // [Operands] + // -------------------------------------------------------------------------- + + //! @brief Get count of operands in operands array (number between 0 to 2 inclusive). + inline uint32_t getOperandsCount() const ASMJIT_NOTHROW { return _operandsCount; } + + //! @brief Get operands array (3 operands total). + inline Operand* getOperands() ASMJIT_NOTHROW { return _operands; } + //! @brief Get operands array (3 operands total). + inline const Operand* getOperands() const ASMJIT_NOTHROW { return _operands; } + + //! @brief Get memory operand. + inline Mem* getMemOp() ASMJIT_NOTHROW { return _memOp; } + //! @brief Set memory operand. + inline void setMemOp(Mem* op) ASMJIT_NOTHROW { _memOp = op; } + + // -------------------------------------------------------------------------- + // [Variables] + // -------------------------------------------------------------------------- + + //! @brief Get count of variables in instruction operands (and in variables array). + inline uint32_t getVariablesCount() const ASMJIT_NOTHROW { return _variablesCount; } + + //! @brief Get operands array (3 operands total). + inline VarAllocRecord* getVariables() ASMJIT_NOTHROW { return _variables; } + //! @brief Get operands array (3 operands total). + inline const VarAllocRecord* getVariables() const ASMJIT_NOTHROW { return _variables; } + + // -------------------------------------------------------------------------- + // [Jump] + // -------------------------------------------------------------------------- + + //! @brief Get possible jump target. + //! + //! If this instruction is conditional or normal jump then return value is + //! label location (ETarget instance), otherwise return value is @c NULL. + virtual ETarget* getJumpTarget() const ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + //! @brief Instruction code, see @c INST_CODE. + uint32_t _code; + + //! @brief Emit options, see @c EMIT_OPTIONS. + uint32_t _emitOptions; + + //! @brief Operands count. + uint32_t _operandsCount; + + //! @brief Variables count. + uint32_t _variablesCount; + + //! @brief Operands. + Operand* _operands; + //! @brief Memory operand (if instruction contains any). + Mem* _memOp; + + //! @brief Variables (extracted from operands). + VarAllocRecord* _variables; + + //! @brief Whether the instruction is special. + bool _isSpecial; + //! @brief Whether the instruction is FPU. + bool _isFPU; + + //! @brief Whether the one of the operands is GPB.Lo register. + bool _isGPBLoUsed; + //! @brief Whether the one of the operands is GPB.Hi register. + bool _isGPBHiUsed; + + friend struct EFunction; + friend struct CompilerContext; + friend struct CompilerCore; + +private: + ASMJIT_DISABLE_COPY(EInstruction) +}; + +// ============================================================================ +// [AsmJit::EJmp] +// ============================================================================ + +//! @brief Emittable that represents single instruction that can jump somewhere. +struct ASMJIT_API EJmp : public EInstruction +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + EJmp(Compiler* c, uint32_t code, Operand* operandsData, uint32_t operandsCount) ASMJIT_NOTHROW; + virtual ~EJmp() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + virtual void prepare(CompilerContext& cc) ASMJIT_NOTHROW; + virtual Emittable* translate(CompilerContext& cc) ASMJIT_NOTHROW; + virtual void emit(Assembler& a) ASMJIT_NOTHROW; + + void _doJump(CompilerContext& cc) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Jump] + // -------------------------------------------------------------------------- + + virtual ETarget* getJumpTarget() const ASMJIT_NOTHROW; + + inline EJmp* getJumpNext() const ASMJIT_NOTHROW { return _jumpNext; } + inline bool isTaken() const ASMJIT_NOTHROW { return _isTaken; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + ETarget* _jumpTarget; + EJmp *_jumpNext; + StateData* _state; + bool _isTaken; + + friend struct EFunction; + friend struct CompilerContext; + friend struct CompilerCore; + +private: + ASMJIT_DISABLE_COPY(EJmp) +}; + +// ============================================================================ +// [AsmJit::EFunction] +// ============================================================================ + +//! @brief Function emittable used to generate C/C++ functions. +//! +//! Functions are base blocks for generating assembler output. Each generated +//! assembler stream needs standard entry and leave sequences thats compatible +//! to the operating system conventions - Application Binary Interface (ABI). +//! +//! Function class can be used to generate entry (prolog) and leave (epilog) +//! sequences that is compatible to a given calling convention and to allocate +//! and manage variables that can be allocated to registers or spilled. +//! +//! @note To create function use @c AsmJit::Compiler::newFunction() method, do +//! not create @c EFunction instances using other ways. +//! +//! @sa @c State, @c Var. +struct ASMJIT_API EFunction : public Emittable +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new @c Function instance. + //! + //! @note Always use @c AsmJit::Compiler::newFunction() to create @c Function + //! instance. + EFunction(Compiler* c) ASMJIT_NOTHROW; + //! @brief Destroy @c Function instance. + virtual ~EFunction() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + virtual void prepare(CompilerContext& cc) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Utilities] + // -------------------------------------------------------------------------- + + virtual int getMaxSize() const ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Function Prototype (Calling Convention + Arguments) / Return Value] + // -------------------------------------------------------------------------- + + inline const FunctionPrototype& getPrototype() const ASMJIT_NOTHROW { return _functionPrototype; } + inline uint32_t getHint(uint32_t hint) ASMJIT_NOTHROW { return _hints[hint]; } + + void setPrototype( + uint32_t callingConvention, + const uint32_t* arguments, + uint32_t argumentsCount, + uint32_t returnValue) ASMJIT_NOTHROW; + void setHint(uint32_t hint, uint32_t value) ASMJIT_NOTHROW; + + inline EProlog* getProlog() const ASMJIT_NOTHROW { return _prolog; } + inline EEpilog* getEpilog() const ASMJIT_NOTHROW { return _epilog; } + + inline EFunctionEnd* getEnd() const ASMJIT_NOTHROW { return _end; } + + //! @brief Create variables from FunctionPrototype declaration. This is just + //! parsing what FunctionPrototype generated for current function calling + //! convention and arguments. + void _createVariables() ASMJIT_NOTHROW; + + //! @brief Prepare variables (ids, names, scope, registers). + void _prepareVariables(Emittable* first) ASMJIT_NOTHROW; + + //! @brief Allocate variables (setting correct state, changing masks, etc). + void _allocVariables(CompilerContext& cc) ASMJIT_NOTHROW; + + void _preparePrologEpilog(CompilerContext& cc) ASMJIT_NOTHROW; + void _dumpFunction(CompilerContext& cc) ASMJIT_NOTHROW; + void _emitProlog(CompilerContext& cc) ASMJIT_NOTHROW; + void _emitEpilog(CompilerContext& cc) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Function-Call] + // -------------------------------------------------------------------------- + + //! @brief Reserve stack for calling other function and mark function as + //! callee. + void reserveStackForFunctionCall(int32_t size); + + // -------------------------------------------------------------------------- + // [Labels] + // -------------------------------------------------------------------------- + + //! @brief Get function entry label. + //! + //! Entry label can be used to call this function from another code that's + //! being generated. + inline const Label& getEntryLabel() const ASMJIT_NOTHROW { return _entryLabel; } + + //! @brief Get function exit label. + //! + //! Use exit label to jump to function epilog. + inline const Label& getExitLabel() const ASMJIT_NOTHROW { return _exitLabel; } + + // -------------------------------------------------------------------------- + // [Misc] + // -------------------------------------------------------------------------- + + //! @brief Set the _isEspAdjusted member to true. + //! + //! This method is used to tell compiler that the ESP/RSP must be adjusted in + //! function prolog/epilog, because the stack is manipulated (usually caused + //! by the function call, see @c ECall). + inline void mustAdjustEsp() { _isEspAdjusted = true; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + //! @brief Function prototype. + FunctionPrototype _functionPrototype; + //! @brief Function arguments (variable IDs). + VarData** _argumentVariables; + //! @brief Function hints. + uint32_t _hints[16]; + + //! @brief Whether the function stack is aligned by 16-bytes by OS. + //! + //! This is always true for 64-bit mode and for linux. + bool _isStackAlignedByOsTo16Bytes; + + //! @brief Whether the function stack (for variables) is aligned manually + //! by function to 16-bytes. + //! + //! This makes sense only if _isStackAlignedByOsTo16Bytes is false and MOVDQA + //! instruction or other SSE/SSE2 instructions are used to work with variable + //! stored on the stack. + //! + //! Value is determined automatically by these factors, expectations are: + //! + //! 1. There is 16-byte wide variable which address was used (alloc, spill, + //! op). + //! 2. Function can't be naked. + bool _isStackAlignedByFnTo16Bytes; + + //! @brief Whether the function is using naked prolog / epilog + //! + //! Naked prolog / epilog means to omit saving and restoring EBP. + bool _isNaked; + + //! @brief Whether the ESP register is adjusted by the stack size needed + //! to save registers and function variables. + //! + //! Esp is adjusted by 'sub' instruction in prolog and by add function in + //! epilog (only if function is not naked). + bool _isEspAdjusted; + + //! @brief Whether another function is called from this function. + //! + //! If another function is called from this function, it's needed to prepare + //! stack for it. If this member is true then it's likely that true will be + //! also @c _isEspAdjusted one. + bool _isCaller; + + //! @brief Whether to emit prolog / epilog sequence using push & pop + //! instructions (the default). + bool _pePushPop; + + //! @brief Whether to emit EMMS instruction in epilog (auto-detected). + bool _emitEMMS; + + //! @brief Whether to emit SFence instruction in epilog (auto-detected). + //! + //! @note Combination of @c _emitSFence and @c _emitLFence will result in + //! emitting mfence. + bool _emitSFence; + + //! @brief Whether to emit LFence instruction in epilog (auto-detected). + //! + //! @note Combination of @c _emitSFence and @c _emitLFence will result in + //! emitting mfence. + bool _emitLFence; + + //! @brief Whether the function is finished using @c Compiler::endFunction(). + bool _finished; + + //! @brief Bitfield containing modified and preserved GP registers. + uint32_t _modifiedAndPreservedGP; + + //! @brief Bitfield containing modified and preserved MM registers. + uint32_t _modifiedAndPreservedMM; + + //! @brief Bitfield containing modified and preserved XMM registers. + uint32_t _modifiedAndPreservedXMM; + + //! @brief ID mov movdqa instruction (@c INST_MOVDQA or @c INST_MOVDQU). + //! + //! The value is based on stack alignment. If it's guaranteed that stack + //! is aligned to 16-bytes then @c INST_MOVDQA instruction is used, otherwise + //! the @c INST_MOVDQU instruction is used for 16-byte mov. + uint32_t _movDqaInstruction; + + //! @brief Prolog / epilog stack size for PUSH/POP sequences. + int32_t _pePushPopStackSize; + //! @brief Prolog / epilog stack size for MOV sequences. + int32_t _peMovStackSize; + //! @brief Prolog / epilog stack adjust size (to make it 16-byte aligned). + int32_t _peAdjustStackSize; + + //! @brief Memory stack size (for all variables and temporary memory). + int32_t _memStackSize; + //! @brief Like @c _memStackSize, but aligned to 16-bytes. + int32_t _memStackSize16; + + //! @brief Stack size needed to call other functions. + int32_t _functionCallStackSize; + + //! @brief Function entry label. + Label _entryLabel; + //! @brief Function exit label. + Label _exitLabel; + + //! @brief Function prolog emittable. + EProlog* _prolog; + //! @brief Function epilog emittable. + EEpilog* _epilog; + //! @brief Dummy emittable, signalizes end of function. + EFunctionEnd* _end; + +private: + friend struct CompilerContext; + friend struct CompilerCore; + friend struct EProlog; + friend struct EEpilog; +}; + +// ============================================================================ +// [AsmJit::EProlog] +// ============================================================================ + +//! @brief Prolog emittable. +struct ASMJIT_API EProlog : public Emittable +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref EProlog instance. + EProlog(Compiler* c, EFunction* f) ASMJIT_NOTHROW; + //! @brief Destroy the @ref EProlog instance. + virtual ~EProlog() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + virtual void prepare(CompilerContext& cc) ASMJIT_NOTHROW; + virtual Emittable* translate(CompilerContext& cc) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Methods] + // -------------------------------------------------------------------------- + + //! @brief Get function associated with this prolog. + inline EFunction* getFunction() const ASMJIT_NOTHROW { return _function; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + //! @brief Prolog owner function. + EFunction* _function; + +private: + friend struct CompilerCore; + friend struct EFunction; +}; + +// ============================================================================ +// [AsmJit::EEpilog] +// ============================================================================ + +//! @brief Epilog emittable. +struct ASMJIT_API EEpilog : public Emittable +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref EEpilog instance. + EEpilog(Compiler* c, EFunction* f) ASMJIT_NOTHROW; + //! @brief Destroy the @ref EProlog instance. + virtual ~EEpilog() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + virtual void prepare(CompilerContext& cc) ASMJIT_NOTHROW; + virtual Emittable* translate(CompilerContext& cc) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Methods] + // -------------------------------------------------------------------------- + + //! @brief Get function associated with this epilog. + inline EFunction* getFunction() const ASMJIT_NOTHROW { return _function; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + //! @brief Epilog owner function. + EFunction* _function; + +private: + friend struct CompilerCore; + friend struct EFunction; +}; + +// ============================================================================ +// [AsmJit::ECall] +// ============================================================================ + +//! @brief Function call. +struct ASMJIT_API ECall : public Emittable +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref ECall instance. + ECall(Compiler* c, EFunction* caller, const Operand* target) ASMJIT_NOTHROW; + //! @brief Destroy the @ref ECall instance. + virtual ~ECall() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + virtual void prepare(CompilerContext& cc) ASMJIT_NOTHROW; + virtual Emittable* translate(CompilerContext& cc) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Utilities] + // -------------------------------------------------------------------------- + + virtual int getMaxSize() const ASMJIT_NOTHROW; + virtual bool _tryUnuseVar(VarData* v) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Internal] + // -------------------------------------------------------------------------- + +protected: + + uint32_t _findTemporaryGpRegister(CompilerContext& cc) ASMJIT_NOTHROW; + uint32_t _findTemporaryXmmRegister(CompilerContext& cc) ASMJIT_NOTHROW; + + VarData* _getOverlappingVariable(CompilerContext& cc, + const FunctionPrototype::Argument& argType) const ASMJIT_NOTHROW; + + void _moveAllocatedVariableToStack(CompilerContext& cc, + VarData* vdata, const FunctionPrototype::Argument& argType) ASMJIT_NOTHROW; + + void _moveSpilledVariableToStack(CompilerContext& cc, + VarData* vdata, const FunctionPrototype::Argument& argType, + uint32_t temporaryGpReg, + uint32_t temporaryXmmReg) ASMJIT_NOTHROW; + + void _moveSrcVariableToRegister(CompilerContext& cc, + VarData* vdata, const FunctionPrototype::Argument& argType) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Function Prototype (Calling Convention + Arguments) / Return Value] + // -------------------------------------------------------------------------- + +public: + + //! @brief Get function prototype. + inline const FunctionPrototype& getPrototype() const ASMJIT_NOTHROW { return _functionPrototype; } + + //! @brief Set function prototype. + inline void setPrototype(uint32_t cconv, const FunctionDefinition& def) ASMJIT_NOTHROW + { + _setPrototype( + cconv, + def.getArguments(), + def.getArgumentsCount(), + def.getReturnValue()); + } + + //! @brief Set function prototype (internal). + void _setPrototype( + uint32_t callingConvention, + const uint32_t* arguments, + uint32_t argumentsCount, + uint32_t returnValue) ASMJIT_NOTHROW; + + //! @brief Set function argument @a i to @a var. + bool setArgument(uint32_t i, const BaseVar& var) ASMJIT_NOTHROW; + //! @brief Set function argument @a i to @a imm. + bool setArgument(uint32_t i, const Imm& imm) ASMJIT_NOTHROW; + + //! @brief Set return value to + bool setReturn(const Operand& first, const Operand& second = Operand()) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Methods] + // -------------------------------------------------------------------------- + + //! @brief Get caller. + inline EFunction* getCaller() const ASMJIT_NOTHROW { return _caller; } + + //! @brief Get operand (function address). + inline Operand& getTarget() ASMJIT_NOTHROW { return _target; } + //! @overload + inline const Operand& getTarget() const ASMJIT_NOTHROW { return _target; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + //! @brief Function prototype. + FunctionPrototype _functionPrototype; + + //! @brief Callee (the function that calls me). + EFunction* _caller; + + //! @brief Arguments (operands). + Operand* _args; + + //! @brief Operand (address of function, register, label, ...) + Operand _target; + + //! @brief Return value (operands) + Operand _ret[2]; + + //! @brief Mask of GP registers used as function arguments. + uint32_t _gpParams; + //! @brief Mask of MM registers used as function arguments. + uint32_t _mmParams; + //! @brief Mask of XMM registers used as function arguments. + uint32_t _xmmParams; + + //! @brief Variables count. + uint32_t _variablesCount; + + //! @brief Variables (extracted from operands). + VarCallRecord* _variables; + //! @brief Argument index to @c VarCallRecord. + VarCallRecord* _argumentToVarRecord[FUNC_MAX_ARGS]; + +private: + friend struct CompilerCore; +}; + +// ============================================================================ +// [AsmJit::ERet] +// ============================================================================ + +//! @brief Function return. +struct ASMJIT_API ERet : public Emittable +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref ERet instance. + ERet(Compiler* c, EFunction* function, const Operand* first, const Operand* second) ASMJIT_NOTHROW; + //! @brief Destroy the @ref ERet instance. + virtual ~ERet() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + virtual void prepare(CompilerContext& cc) ASMJIT_NOTHROW; + virtual Emittable* translate(CompilerContext& cc) ASMJIT_NOTHROW; + virtual void emit(Assembler& a) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Utilities] + // -------------------------------------------------------------------------- + + virtual int getMaxSize() const ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Methods] + // -------------------------------------------------------------------------- + + //! @Brief Get function. + inline EFunction* getFunction() ASMJIT_NOTHROW { return _function; } + + //! @brief Get operand (function address). + inline Operand& getFirst() ASMJIT_NOTHROW { return _ret[0]; } + //! @brief Get operand (function address). + inline Operand& getSecond() ASMJIT_NOTHROW { return _ret[1]; } + //! @overload + inline const Operand& getFirst() const ASMJIT_NOTHROW { return _ret[0]; } + //! @overload + inline const Operand& getSecond() const ASMJIT_NOTHROW { return _ret[1]; } + + //! @brief Get whether jump to epilog have to be emitted. + bool shouldEmitJumpToEpilog() const ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + //! @brief Function. + EFunction* _function; + //! @brief Return value (operands) + Operand _ret[2]; + +private: + friend struct CompilerCore; +}; + +// ============================================================================ +// [AsmJit::CompilerContext] +// ============================================================================ + +//! @internal +//! +//! @brief Compiler context is used by @ref Compiler. +//! +//! Compiler context is used during compilation and normally developer doesn't +//! need access to it. The context is user per function (it's reset after each +//! function is generated). +struct ASMJIT_API CompilerContext +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new @ref CompilerContext instance. + CompilerContext(Compiler* compiler) ASMJIT_NOTHROW; + //! @brief Destroy the @ref CompilerContext instance. + ~CompilerContext() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Clear] + // -------------------------------------------------------------------------- + + //! @brief Clear context, preparing it for next function generation. + void _clear() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Register Allocator] + // -------------------------------------------------------------------------- + + //! @brief Allocate variable + //! + //! Calls @c allocGPVar, @c allocMMVar or @c allocXMMVar methods. + void allocVar(VarData* vdata, uint32_t regMask, uint32_t vflags) ASMJIT_NOTHROW; + //! @brief Save variable. + //! + //! Calls @c saveGPVar, @c saveMMVar or @c saveXMMVar methods. + void saveVar(VarData* vdata) ASMJIT_NOTHROW; + //! @brief Spill variable. + //! + //! Calls @c spillGPVar, @c spillMMVar or @c spillXMMVar methods. + void spillVar(VarData* vdata) ASMJIT_NOTHROW; + //! @brief Unuse variable (didn't spill, just forget about it). + void unuseVar(VarData* vdata, uint32_t toState) ASMJIT_NOTHROW; + + //! @brief Helper method that is called for each variable per emittable. + inline void _unuseVarOnEndOfScope(Emittable* e, VarData* v) + { + if (v->lastEmittable == e) + unuseVar(v, VARIABLE_STATE_UNUSED); + } + //! @overload + inline void _unuseVarOnEndOfScope(Emittable* e, VarAllocRecord* rec) + { + VarData* v = rec->vdata; + if (v->lastEmittable == e || (rec->vflags & VARIABLE_ALLOC_UNUSE_AFTER_USE)) + unuseVar(v, VARIABLE_STATE_UNUSED); + } + //! @overload + inline void _unuseVarOnEndOfScope(Emittable* e, VarCallRecord* rec) + { + VarData* v = rec->vdata; + if (v->lastEmittable == e || (rec->flags & VarCallRecord::FLAG_UNUSE_AFTER_USE)) + unuseVar(v, VARIABLE_STATE_UNUSED); + } + + //! @brief Allocate variable (GP). + void allocGPVar(VarData* vdata, uint32_t regMask, uint32_t vflags) ASMJIT_NOTHROW; + //! @brief Save variable (GP). + void saveGPVar(VarData* vdata) ASMJIT_NOTHROW; + //! @brief Spill variable (GP). + void spillGPVar(VarData* vdata) ASMJIT_NOTHROW; + + //! @brief Allocate variable (MM). + void allocMMVar(VarData* vdata, uint32_t regMask, uint32_t vflags) ASMJIT_NOTHROW; + //! @brief Save variable (MM). + void saveMMVar(VarData* vdata) ASMJIT_NOTHROW; + //! @brief Spill variable (MM). + void spillMMVar(VarData* vdata) ASMJIT_NOTHROW; + + //! @brief Allocate variable (XMM). + void allocXMMVar(VarData* vdata, uint32_t regMask, uint32_t vflags) ASMJIT_NOTHROW; + //! @brief Save variable (XMM). + void saveXMMVar(VarData* vdata) ASMJIT_NOTHROW; + //! @brief Spill variable (XMM). + void spillXMMVar(VarData* vdata) ASMJIT_NOTHROW; + + //! @brief Emit load variable instruction(s). + void emitLoadVar(VarData* vdata, uint32_t regIndex) ASMJIT_NOTHROW; + //! @brief Emit save variable instruction(s). + void emitSaveVar(VarData* vdata, uint32_t regIndex) ASMJIT_NOTHROW; + + //! @brief Emit move variable instruction(s). + void emitMoveVar(VarData* vdata, uint32_t regIndex, uint32_t vflags) ASMJIT_NOTHROW; + //! @brief Emit exchange variable instruction(s). + void emitExchangeVar(VarData* vdata, uint32_t regIndex, uint32_t vflags, VarData* other) ASMJIT_NOTHROW; + + //! @brief Called each time a variable is alloceted. + void _postAlloc(VarData* vdata, uint32_t vflags) ASMJIT_NOTHROW; + //! @brief Marks variable home memory as used (must be called at least once + //! for each variable that uses function local memory - stack). + void _markMemoryUsed(VarData* vdata) ASMJIT_NOTHROW; + + Mem _getVarMem(VarData* vdata) ASMJIT_NOTHROW; + + VarData* _getSpillCandidateGP() ASMJIT_NOTHROW; + VarData* _getSpillCandidateMM() ASMJIT_NOTHROW; + VarData* _getSpillCandidateXMM() ASMJIT_NOTHROW; + VarData* _getSpillCandidateGeneric(VarData** varArray, uint32_t count) ASMJIT_NOTHROW; + + inline bool _isActive(VarData* vdata) ASMJIT_NOTHROW { return vdata->nextActive != NULL; } + void _addActive(VarData* vdata) ASMJIT_NOTHROW; + void _freeActive(VarData* vdata) ASMJIT_NOTHROW; + void _freeAllActive() ASMJIT_NOTHROW; + + void _allocatedVariable(VarData* vdata) ASMJIT_NOTHROW; + + inline void _allocatedGPRegister(uint32_t index) ASMJIT_NOTHROW { _state.usedGP |= Util::maskFromIndex(index); _modifiedGPRegisters |= Util::maskFromIndex(index); } + inline void _allocatedMMRegister(uint32_t index) ASMJIT_NOTHROW { _state.usedMM |= Util::maskFromIndex(index); _modifiedMMRegisters |= Util::maskFromIndex(index); } + inline void _allocatedXMMRegister(uint32_t index) ASMJIT_NOTHROW { _state.usedXMM |= Util::maskFromIndex(index); _modifiedXMMRegisters |= Util::maskFromIndex(index); } + + inline void _freedGPRegister(uint32_t index) ASMJIT_NOTHROW { _state.usedGP &= ~Util::maskFromIndex(index); } + inline void _freedMMRegister(uint32_t index) ASMJIT_NOTHROW { _state.usedMM &= ~Util::maskFromIndex(index); } + inline void _freedXMMRegister(uint32_t index) ASMJIT_NOTHROW { _state.usedXMM &= ~Util::maskFromIndex(index); } + + inline void _markGPRegisterModified(uint32_t index) ASMJIT_NOTHROW { _modifiedGPRegisters |= Util::maskFromIndex(index); } + inline void _markMMRegisterModified(uint32_t index) ASMJIT_NOTHROW { _modifiedMMRegisters |= Util::maskFromIndex(index); } + inline void _markXMMRegisterModified(uint32_t index) ASMJIT_NOTHROW { _modifiedXMMRegisters |= Util::maskFromIndex(index); } + + // TODO: Find code which uses this and improve. + inline void _newRegisterHomeIndex(VarData* vdata, uint32_t idx) + { + if (vdata->homeRegisterIndex == INVALID_VALUE) vdata->homeRegisterIndex = idx; + vdata->prefRegisterMask |= (1U << idx); + } + + // TODO: Find code which uses this and improve. + inline void _newRegisterHomeMask(VarData* vdata, uint32_t mask) + { + vdata->prefRegisterMask |= mask; + } + + // -------------------------------------------------------------------------- + // [Operand Patcher] + // -------------------------------------------------------------------------- + + void translateOperands(Operand* operands, uint32_t count) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Accessors] + // -------------------------------------------------------------------------- + + inline Compiler* getCompiler() const ASMJIT_NOTHROW { return _compiler; } + inline EFunction* getFunction() const ASMJIT_NOTHROW { return _function; } + + inline Emittable* getExtraBlock() const ASMJIT_NOTHROW { return _extraBlock; } + inline void setExtraBlock(Emittable* e) ASMJIT_NOTHROW { _extraBlock = e; } + + // -------------------------------------------------------------------------- + // [Backward Code] + // -------------------------------------------------------------------------- + + void addBackwardCode(EJmp* from) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Forward Jump] + // -------------------------------------------------------------------------- + + void addForwardJump(EJmp* inst) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [State] + // -------------------------------------------------------------------------- + + StateData* _saveState() ASMJIT_NOTHROW; + void _assignState(StateData* state) ASMJIT_NOTHROW; + void _restoreState(StateData* state, uint32_t targetOffset = INVALID_VALUE) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Memory Allocator] + // -------------------------------------------------------------------------- + + VarMemBlock* _allocMemBlock(uint32_t size) ASMJIT_NOTHROW; + void _freeMemBlock(VarMemBlock* mem) ASMJIT_NOTHROW; + + void _allocMemoryOperands() ASMJIT_NOTHROW; + void _patchMemoryOperands(Emittable* start, Emittable* stop) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Zone memory manager. + Zone _zone; + + //! @brief Compiler instance. + Compiler* _compiler; + //! @brief Function emittable. + EFunction* _function; + + //! @brief Current active scope start emittable. + Emittable* _start; + //! @brief Current active scope end emittable. + Emittable* _stop; + //! @brief Emittable that is used to insert some code after the function body. + Emittable* _extraBlock; + + //! @brief Current state (register allocator). + StateData _state; + //! @brief Link to circullar double-linked list containing all active variables + //! (for current state). + VarData* _active; + + //! @brief Forward jumps (single linked list). + ForwardJumpData* _forwardJumps; + + //! @brief Current offset, used in prepare() stage. Each emittable should increment it. + uint32_t _currentOffset; + + //! @brief Whether current code is unrecheable. + uint32_t _unrecheable; + + //! @brief Global modified GP registers mask (per function). + uint32_t _modifiedGPRegisters; + //! @brief Global modified MM registers mask (per function). + uint32_t _modifiedMMRegisters; + //! @brief Global modified XMM registers mask (per function). + uint32_t _modifiedXMMRegisters; + + //! @brief Whether the EBP/RBP register can be used by register allocator. + uint32_t _allocableEBP; + + //! @brief ESP adjust constant (changed during PUSH/POP or when using + //! stack. + int _adjustESP; + + //! @brief Function arguments base pointer (register). + uint32_t _argumentsBaseReg; + //! @brief Function arguments base offset. + int32_t _argumentsBaseOffset; + //! @brief Function arguments displacement. + int32_t _argumentsActualDisp; + + //! @brief Function variables base pointer (register). + uint32_t _variablesBaseReg; + //! @brief Function variables base offset. + int32_t _variablesBaseOffset; + //! @brief Function variables displacement. + int32_t _variablesActualDisp; + + //! @brief Used memory blocks (for variables, here is each created mem block + //! that can be also in _memFree list). + VarMemBlock* _memUsed; + //! @brief Free memory blocks (freed, prepared for another allocation). + VarMemBlock* _memFree; + //! @brief Count of 4-byte memory blocks used by the function. + uint32_t _mem4BlocksCount; + //! @brief Count of 8-byte memory blocks used by the function. + uint32_t _mem8BlocksCount; + //! @brief Count of 16-byte memory blocks used by the function. + uint32_t _mem16BlocksCount; + //! @brief Count of total bytes of stack memory used by the function. + uint32_t _memBytesTotal; + + //! @brief Whether to emit comments. + bool _emitComments; + + //! @brief List of emittables which need to be translated. These emittables + //! are filled by @c addBackwardCode(). + PodVector<EJmp*> _backCode; + //! @brief Backward code position (starts at 0). + sysuint_t _backPos; +}; + +// ============================================================================ +// [AsmJit::CompilerUtil] +// ============================================================================ + +//! @brief Static class that contains utility methods. +struct ASMJIT_API CompilerUtil +{ + static bool isStack16ByteAligned(); +}; + +// ============================================================================ +// [AsmJit::CompilerCore] +// ============================================================================ + +//! @brief Compiler core. +//! +//! @sa @c AsmJit::Compiler. +struct ASMJIT_API CompilerCore +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new (empty) instance of @c Compiler. + CompilerCore(CodeGenerator* codeGenerator) ASMJIT_NOTHROW; + //! @brief Destroy @c Compiler instance. + virtual ~CompilerCore() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Code Generator] + // -------------------------------------------------------------------------- + + //! @brief Get code generator. + inline CodeGenerator* getCodeGenerator() const { return _codeGenerator; } + + // -------------------------------------------------------------------------- + // [Memory Management] + // -------------------------------------------------------------------------- + + //! @brief Get zone memory manager. + inline Zone& getZone() { return _zone; } + + // -------------------------------------------------------------------------- + // [Logging] + // -------------------------------------------------------------------------- + + //! @brief Get logger. + inline Logger* getLogger() const ASMJIT_NOTHROW { return _logger; } + + //! @brief Set logger to @a logger. + virtual void setLogger(Logger* logger) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Error Handling] + // -------------------------------------------------------------------------- + + //! @brief Get error code. + inline uint32_t getError() const ASMJIT_NOTHROW { return _error; } + + //! @brief Set error code. + //! + //! This method is virtual, because higher classes can use it to catch all + //! errors. + virtual void setError(uint32_t error) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Properties] + // -------------------------------------------------------------------------- + + //! @brief Get compiler property. + uint32_t getProperty(uint32_t propertyId); + //! @brief Set compiler property. + void setProperty(uint32_t propertyId, uint32_t value); + + // -------------------------------------------------------------------------- + // [Buffer] + // -------------------------------------------------------------------------- + + //! @brief Clear everything, but not deallocate buffers. + //! + //! @note This method will destroy your code. + void clear() ASMJIT_NOTHROW; + + //! @brief Free internal buffer, all emitters and NULL all pointers. + //! + //! @note This method will destroy your code. + void free() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emittables] + // -------------------------------------------------------------------------- + + //! @brief Get first emittable. + inline Emittable* getFirstEmittable() const ASMJIT_NOTHROW { return _first; } + + //! @brief Get last emittable. + inline Emittable* getLastEmittable() const ASMJIT_NOTHROW { return _last; } + + //! @brief Get current emittable. + //! + //! @note If this method return @c NULL, it means that nothing emitted yet. + inline Emittable* getCurrentEmittable() const ASMJIT_NOTHROW { return _current; } + + //! @brief Set new current emittable and return previous one. + Emittable* setCurrentEmittable(Emittable* current) ASMJIT_NOTHROW; + + //! @brief Add emittable after current and set current to @a emittable. + void addEmittable(Emittable* emittable) ASMJIT_NOTHROW; + + //! @brief Add emittable after @a ref. + void addEmittableAfter(Emittable* emittable, Emittable* ref) ASMJIT_NOTHROW; + + //! @brief Remove emittable (and if needed set current to previous). + void removeEmittable(Emittable* emittable) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Comment] + // -------------------------------------------------------------------------- + + //! @brief Emit a single comment line that will be logged. + //! + //! Emitting comments are useful to log something. Because assembler can be + //! generated from AST or other data structures, you may sometimes need to + //! log data characteristics or statistics. + //! + //! @note Emitting comment is not directly sent to logger, but instead it's + //! stored in @c AsmJit::Compiler and emitted when @c serialize() method is + //! called. Each comment keeps correct order. + void comment(const char* fmt, ...) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Function Builder] + // -------------------------------------------------------------------------- + + //! @brief Create a new function. + //! + //! @param cconv Calling convention to use (see @c CALL_CONV enum) + //! @param params Function arguments prototype. + //! + //! This method is usually used as a first step when generating functions + //! by @c Compiler. First parameter @a cconv specifies function calling + //! convention to use. Second parameter @a params specifies function + //! arguments. To create function arguments are used templates + //! @c BuildFunction0<>, @c BuildFunction1<...>, @c BuildFunction2<...>, + //! etc... + //! + //! Templates with BuildFunction prefix are used to generate argument IDs + //! based on real C++ types. See next example how to generate function with + //! two 32-bit integer arguments. + //! + //! @code + //! // Building function using AsmJit::Compiler example. + //! + //! // Compiler instance + //! Compiler c; + //! + //! // Begin of function (also emits function @c Prolog) + //! c.newFunction( + //! // Default calling convention (32-bit cdecl or 64-bit for host OS) + //! CALL_CONV_DEFAULT, + //! // Using function builder to generate arguments list + //! BuildFunction2<int, int>()); + //! + //! // End of function (also emits function @c Epilog) + //! c.endFunction(); + //! @endcode + //! + //! You can see that building functions is really easy. Previous code snipped + //! will generate code for function with two 32-bit integer arguments. You + //! can access arguments by @c AsmJit::Function::argument() method. Arguments + //! are indexed from 0 (like everything in C). + //! + //! @code + //! // Accessing function arguments through AsmJit::Function example. + //! + //! // Compiler instance + //! Compiler c; + //! + //! // Begin of function (also emits function @c Prolog) + //! c.newFunction( + //! // Default calling convention (32-bit cdecl or 64-bit for host OS) + //! CALL_CONV_DEFAULT, + //! // Using function builder to generate arguments list + //! BuildFunction2<int, int>()); + //! + //! // Arguments are like other variables, you need to reference them by + //! // variable operands: + //! GPVar a0 = c.argGP(0); + //! GPVar a1 = c.argGP(1); + //! + //! // Use them. + //! c.add(a0, a1); + //! + //! // End of function (emits function epilog and return) + //! c.endFunction(); + //! @endcode + //! + //! Arguments are like variables. How to manipulate with variables is + //! documented in @c AsmJit::Compiler, variables section. + //! + //! @note To get current function use @c currentFunction() method or save + //! pointer to @c AsmJit::Function returned by @c AsmJit::Compiler::newFunction<> + //! method. Recommended is to save the pointer. + //! + //! @sa @c BuildFunction0, @c BuildFunction1, @c BuildFunction2, ... + inline EFunction* newFunction(uint32_t cconv, const FunctionDefinition& def) ASMJIT_NOTHROW + { + return newFunction_( + cconv, + def.getArguments(), + def.getArgumentsCount(), + def.getReturnValue()); + } + + //! @brief Create a new function (low level version). + //! + //! @param cconv Function calling convention (see @c AsmJit::CALL_CONV). + //! @param args Function arguments (see @c AsmJit::VARIABLE_TYPE). + //! @param count Arguments count. + //! + //! This method is internally called from @c newFunction() method and + //! contains arguments thats used internally by @c AsmJit::Compiler. + //! + //! @note To get current function use @c currentFunction() method. + EFunction* newFunction_( + uint32_t cconv, + const uint32_t* arguments, + uint32_t argumentsCount, + uint32_t returnValue) ASMJIT_NOTHROW; + + //! @brief Get current function. + //! + //! This method can be called within @c newFunction() and @c endFunction() + //! block to get current function you are working with. It's recommended + //! to store @c AsmJit::Function pointer returned by @c newFunction<> method, + //! because this allows you in future implement function sections outside of + //! function itself (yeah, this is possible!). + inline EFunction* getFunction() const ASMJIT_NOTHROW { return _function; } + + //! @brief End of current function scope and all variables. + EFunction* endFunction() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Memory Management] + // -------------------------------------------------------------------------- + + inline EInstruction* newInstruction(uint32_t code, Operand* operandsData, uint32_t operandsCount) ASMJIT_NOTHROW + { + if (code >= _INST_J_BEGIN && code <= _INST_J_END) + { + void* addr = _zone.zalloc(sizeof(EJmp)); + + return new(addr) EJmp( + reinterpret_cast<Compiler*>(this), code, operandsData, operandsCount); + } + else + { + void* addr = _zone.zalloc(sizeof(EInstruction) + operandsCount * sizeof(Operand)); + + return new(addr) EInstruction( + reinterpret_cast<Compiler*>(this), code, operandsData, operandsCount); + } + } + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + //! @brief Emit instruction with no operand. + void _emitInstruction(uint32_t code) ASMJIT_NOTHROW; + + //! @brief Emit instruction with one operand. + void _emitInstruction(uint32_t code, const Operand* o0) ASMJIT_NOTHROW; + + //! @brief Emit instruction with two operands. + void _emitInstruction(uint32_t code, const Operand* o0, const Operand* o1) ASMJIT_NOTHROW; + + //! @brief Emit instruction with three operands. + void _emitInstruction(uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2) ASMJIT_NOTHROW; + + //! @brief Emit instruction with four operands (Special instructions). + void _emitInstruction(uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2, const Operand* o3) ASMJIT_NOTHROW; + + //! @brief Emit instruction with five operands (Special instructions). + void _emitInstruction(uint32_t code, const Operand* o0, const Operand* o1, const Operand* o2, const Operand* o3, const Operand* o4) ASMJIT_NOTHROW; + + //! @brief Private method for emitting jcc. + void _emitJcc(uint32_t code, const Label* label, uint32_t hint) ASMJIT_NOTHROW; + + //! @brief Private method for emitting function call. + ECall* _emitCall(const Operand* o0) ASMJIT_NOTHROW; + + //! @brief Private method for returning a value from the function. + void _emitReturn(const Operand* first, const Operand* second) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Embed] + // -------------------------------------------------------------------------- + + //! @brief Embed data into instruction stream. + void embed(const void* data, sysuint_t len) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Align] + // -------------------------------------------------------------------------- + + //! @brief Align target buffer to @a m bytes. + //! + //! Typical usage of this is to align labels at start of the inner loops. + //! + //! Inserts @c nop() instructions or CPU optimized NOPs. + void align(uint32_t m) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Label] + // -------------------------------------------------------------------------- + + //! @brief Create and return new label. + Label newLabel() ASMJIT_NOTHROW; + + //! @brief Bind label to the current offset. + //! + //! @note Label can be bound only once! + void bind(const Label& label) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Variables] + // -------------------------------------------------------------------------- + + //! @internal + //! + //! @brief Create a new variable data. + VarData* _newVarData(const char* name, uint32_t type, uint32_t size) ASMJIT_NOTHROW; + + //! @internal + //! + //! @brief Get variable data. + inline VarData* _getVarData(uint32_t id) const ASMJIT_NOTHROW + { + ASMJIT_ASSERT(id != INVALID_VALUE); + return _varData[id & OPERAND_ID_VALUE_MASK]; + } + + //! @brief Create a new general-purpose variable. + GPVar newGP(uint32_t variableType = VARIABLE_TYPE_GPN, const char* name = NULL) ASMJIT_NOTHROW; + //! @brief Get argument as general-purpose variable. + GPVar argGP(uint32_t index) ASMJIT_NOTHROW; + + //! @brief Create a new MM variable. + MMVar newMM(uint32_t variableType = VARIABLE_TYPE_MM, const char* name = NULL) ASMJIT_NOTHROW; + //! @brief Get argument as MM variable. + MMVar argMM(uint32_t index) ASMJIT_NOTHROW; + + //! @brief Create a new XMM variable. + XMMVar newXMM(uint32_t variableType = VARIABLE_TYPE_XMM, const char* name = NULL) ASMJIT_NOTHROW; + //! @brief Get argument as XMM variable. + XMMVar argXMM(uint32_t index) ASMJIT_NOTHROW; + + //! @internal + //! + //! @brief Serialize variable hint. + void _vhint(BaseVar& var, uint32_t hintId, uint32_t hintValue) ASMJIT_NOTHROW; + + //! @brief Alloc variable @a var. + void alloc(BaseVar& var) ASMJIT_NOTHROW; + //! @brief Alloc variable @a var using @a regIndex as a register index. + void alloc(BaseVar& var, uint32_t regIndex) ASMJIT_NOTHROW; + //! @brief Alloc variable @a var using @a reg as a demanded register. + void alloc(BaseVar& var, const BaseReg& reg) ASMJIT_NOTHROW; + //! @brief Spill variable @a var. + void spill(BaseVar& var) ASMJIT_NOTHROW; + //! @brief Save variable @a var if modified. + void save(BaseVar& var) ASMJIT_NOTHROW; + //! @brief Unuse variable @a var. + void unuse(BaseVar& var) ASMJIT_NOTHROW; + + //! @brief Get memory home of variable @a var. + void getMemoryHome(BaseVar& var, GPVar* home, int* displacement = NULL); + + //! @brief Set memory home of variable @a var. + //! + //! Default memory home location is on stack (ESP/RSP), but when needed the + //! bebahior can be changed by this method. + //! + //! It is an error to chaining memory home locations. For example the given + //! code is invalid: + //! + //! @code + //! Compiler c; + //! + //! ... + //! GPVar v0 = c.newGP(); + //! GPVar v1 = c.newGP(); + //! GPVar v2 = c.newGP(); + //! GPVar v3 = c.newGP(); + //! + //! c.setMemoryHome(v1, v0, 0); // Allowed, [v0] is memory home for v1. + //! c.setMemoryHome(v2, v0, 4); // Allowed, [v0+4] is memory home for v2. + //! c.setMemoryHome(v3, v2); // CHAINING, NOT ALLOWED! + //! @endcode + void setMemoryHome(BaseVar& var, const GPVar& home, int displacement = 0); + + //! @brief Get priority of variable @a var. + uint32_t getPriority(BaseVar& var) const ASMJIT_NOTHROW; + //! @brief Set priority of variable @a var to @a priority. + void setPriority(BaseVar& var, uint32_t priority) ASMJIT_NOTHROW; + + //! @brief Get save-on-unuse @a var property. + bool getSaveOnUnuse(BaseVar& var) const ASMJIT_NOTHROW; + //! @brief Set save-on-unuse @a var property to @a value. + void setSaveOnUnuse(BaseVar& var, bool value) ASMJIT_NOTHROW; + + //! @brief Rename variable @a var to @a name. + //! + //! @note Only new name will appear in the logger. + void rename(BaseVar& var, const char* name) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [State] + // -------------------------------------------------------------------------- + + //! @internal + //! + //! @brief Create a new @ref StateData instance. + StateData* _newStateData(uint32_t memVarsCount) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Make] + // -------------------------------------------------------------------------- + + //! @brief Make is convenience method to make currently serialized code and + //! return pointer to generated function. + //! + //! What you need is only to cast this pointer to your function type and call + //! it. Note that if there was an error and calling @c getError() method not + //! returns @c ERROR_NONE (zero) then this function always return @c NULL and + //! error value remains the same. + virtual void* make() ASMJIT_NOTHROW; + + //! @brief Method that will emit everything to @c Assembler instance @a a. + virtual void serialize(Assembler& a) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Data] + // -------------------------------------------------------------------------- + + //! @internal + //! + //! @brief Get target (emittable) from operand @a id (label id). + inline ETarget* _getTarget(uint32_t id) + { + ASMJIT_ASSERT((id & OPERAND_ID_TYPE_MASK) == OPERAND_ID_TYPE_LABEL); + return _targetData[id & OPERAND_ID_VALUE_MASK]; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + + //! @brief Code generator. + CodeGenerator* _codeGenerator; + + //! @brief Zone memory management. + Zone _zone; + + //! @brief Logger. + Logger* _logger; + + //! @brief Last error code. + uint32_t _error; + + //! @brief Properties. + uint32_t _properties; + + //! @brief Contains options for next emitted instruction, clear after each emit. + uint32_t _emitOptions; + + //! @brief Whether compiler was finished the job (register allocator, etc...). + uint32_t _finished; + + //! @brief First emittable. + Emittable* _first; + //! @brief Last emittable. + Emittable* _last; + //! @brief Current emittable. + Emittable* _current; + + //! @brief Current function. + EFunction* _function; + + //! @brief Label data. + PodVector<ETarget*> _targetData; + + //! @brief Variable data. + PodVector<VarData*> _varData; + + //! @brief Variable name id (used to generate unique names per function). + int _varNameId; + + //! @brief Compiler context instance, only available after prepare(). + CompilerContext* _cc; + + friend struct BaseVar; + friend struct CompilerContext; + friend struct EFunction; + friend struct EInstruction; +}; + +// ============================================================================ +// [AsmJit::CompilerIntrinsics] +// ============================================================================ + +//! @brief Implementation of @c Compiler intrinsics. +//! +//! Methods in this class are implemented here, because we wan't to hide them +//! in shared libraries. These methods should be never exported by C++ compiler. +//! +//! @sa @c AsmJit::Compiler. +struct ASMJIT_HIDDEN CompilerIntrinsics : public CompilerCore +{ + // Special X86 instructions: + // - cpuid, + // - cbw, cwde, cdqe, + // - cmpxchg + // - cmpxchg8b, cmpxchg16b, + // - daa, das, + // - imul, mul, idiv, div, + // - mov_ptr + // - lahf, sahf + // - maskmovq, maskmovdqu + // - enter, leave + // - ret + // - monitor, mwait + // - pop, popad, popfd, popfq, + // - push, pushad, pushfd, pushfq + // - rcl, rcr, rol, ror, sal, sar, shl, shr + // - shld, shrd + // - rdtsc. rdtscp + // - lodsb, lodsd, lodsq, lodsw + // - movsb, movsd, movsq, movsw + // - stosb, stosd, stosq, stosw + // - cmpsb, cmpsd, cmpsq, cmpsw + // - scasb, scasd, scasq, scasw + // + // Special X87 instructions: + // - fisttp + + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create @c CompilerIntrinsics instance. Always use @c AsmJit::Compiler. + inline CompilerIntrinsics(CodeGenerator* codeGenerator) ASMJIT_NOTHROW : + CompilerCore(codeGenerator) + { + } + + // -------------------------------------------------------------------------- + // [Embed] + // -------------------------------------------------------------------------- + + //! @brief Add 8-bit integer data to the instuction stream. + inline void db(uint8_t x) ASMJIT_NOTHROW { embed(&x, 1); } + //! @brief Add 16-bit integer data to the instuction stream. + inline void dw(uint16_t x) ASMJIT_NOTHROW { embed(&x, 2); } + //! @brief Add 32-bit integer data to the instuction stream. + inline void dd(uint32_t x) ASMJIT_NOTHROW { embed(&x, 4); } + //! @brief Add 64-bit integer data to the instuction stream. + inline void dq(uint64_t x) ASMJIT_NOTHROW { embed(&x, 8); } + + //! @brief Add 8-bit integer data to the instuction stream. + inline void dint8(int8_t x) ASMJIT_NOTHROW { embed(&x, sizeof(int8_t)); } + //! @brief Add 8-bit integer data to the instuction stream. + inline void duint8(uint8_t x) ASMJIT_NOTHROW { embed(&x, sizeof(uint8_t)); } + + //! @brief Add 16-bit integer data to the instuction stream. + inline void dint16(int16_t x) ASMJIT_NOTHROW { embed(&x, sizeof(int16_t)); } + //! @brief Add 16-bit integer data to the instuction stream. + inline void duint16(uint16_t x) ASMJIT_NOTHROW { embed(&x, sizeof(uint16_t)); } + + //! @brief Add 32-bit integer data to the instuction stream. + inline void dint32(int32_t x) ASMJIT_NOTHROW { embed(&x, sizeof(int32_t)); } + //! @brief Add 32-bit integer data to the instuction stream. + inline void duint32(uint32_t x) ASMJIT_NOTHROW { embed(&x, sizeof(uint32_t)); } + + //! @brief Add 64-bit integer data to the instuction stream. + inline void dint64(int64_t x) ASMJIT_NOTHROW { embed(&x, sizeof(int64_t)); } + //! @brief Add 64-bit integer data to the instuction stream. + inline void duint64(uint64_t x) ASMJIT_NOTHROW { embed(&x, sizeof(uint64_t)); } + + //! @brief Add system-integer data to the instuction stream. + inline void dsysint(sysint_t x) ASMJIT_NOTHROW { embed(&x, sizeof(sysint_t)); } + //! @brief Add system-integer data to the instuction stream. + inline void dsysuint(sysuint_t x) ASMJIT_NOTHROW { embed(&x, sizeof(sysuint_t)); } + + //! @brief Add float data to the instuction stream. + inline void dfloat(float x) ASMJIT_NOTHROW { embed(&x, sizeof(float)); } + //! @brief Add double data to the instuction stream. + inline void ddouble(double x) ASMJIT_NOTHROW { embed(&x, sizeof(double)); } + + //! @brief Add pointer data to the instuction stream. + inline void dptr(void* x) ASMJIT_NOTHROW { embed(&x, sizeof(void*)); } + + //! @brief Add MM data to the instuction stream. + inline void dmm(const MMData& x) ASMJIT_NOTHROW { embed(&x, sizeof(MMData)); } + //! @brief Add XMM data to the instuction stream. + inline void dxmm(const XMMData& x) ASMJIT_NOTHROW { embed(&x, sizeof(XMMData)); } + + //! @brief Add data to the instuction stream. + inline void data(const void* data, sysuint_t size) ASMJIT_NOTHROW { embed(data, size); } + + //! @brief Add data in a given structure instance to the instuction stream. + template<typename T> + inline void dstruct(const T& x) ASMJIT_NOTHROW { embed(&x, sizeof(T)); } + + // -------------------------------------------------------------------------- + // [Custom Instructions] + // -------------------------------------------------------------------------- + + // These emitters are used by custom compiler code (register alloc / spill, + // prolog / epilog generator, ...). + + inline void emit(uint32_t code) ASMJIT_NOTHROW + { + _emitInstruction(code); + } + + inline void emit(uint32_t code, const Operand& o0) ASMJIT_NOTHROW + { + _emitInstruction(code, &o0); + } + + inline void emit(uint32_t code, const Operand& o0, const Operand& o1) ASMJIT_NOTHROW + { + _emitInstruction(code, &o0, &o1); + } + + inline void emit(uint32_t code, const Operand& o0, const Operand& o1, const Operand& o2) ASMJIT_NOTHROW + { + _emitInstruction(code, &o0, &o1, &o2); + } + + // -------------------------------------------------------------------------- + // [X86 Instructions] + // -------------------------------------------------------------------------- + + //! @brief Add with Carry. + inline void adc(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_ADC, &dst, &src); + } + //! @brief Add with Carry. + inline void adc(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_ADC, &dst, &src); + } + //! @brief Add with Carry. + inline void adc(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_ADC, &dst, &src); + } + //! @brief Add with Carry. + inline void adc(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_ADC, &dst, &src); + } + //! @brief Add with Carry. + inline void adc(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_ADC, &dst, &src); + } + + //! @brief Add. + inline void add(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_ADD, &dst, &src); + } + //! @brief Add. + inline void add(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_ADD, &dst, &src); + } + //! @brief Add. + inline void add(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_ADD, &dst, &src); + } + //! @brief Add. + inline void add(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_ADD, &dst, &src); + } + //! @brief Add. + inline void add(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_ADD, &dst, &src); + } + + //! @brief Logical And. + inline void and_(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_AND, &dst, &src); + } + //! @brief Logical And. + inline void and_(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_AND, &dst, &src); + } + //! @brief Logical And. + inline void and_(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_AND, &dst, &src); + } + //! @brief Logical And. + inline void and_(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_AND, &dst, &src); + } + //! @brief Logical And. + inline void and_(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_AND, &dst, &src); + } + + //! @brief Bit Scan Forward. + inline void bsf(const GPVar& dst, const GPVar& src) + { + ASMJIT_ASSERT(!dst.isGPB()); + _emitInstruction(INST_BSF, &dst, &src); + } + //! @brief Bit Scan Forward. + inline void bsf(const GPVar& dst, const Mem& src) + { + ASMJIT_ASSERT(!dst.isGPB()); + _emitInstruction(INST_BSF, &dst, &src); + } + + //! @brief Bit Scan Reverse. + inline void bsr(const GPVar& dst, const GPVar& src) + { + ASMJIT_ASSERT(!dst.isGPB()); + _emitInstruction(INST_BSR, &dst, &src); + } + //! @brief Bit Scan Reverse. + inline void bsr(const GPVar& dst, const Mem& src) + { + ASMJIT_ASSERT(!dst.isGPB()); + _emitInstruction(INST_BSR, &dst, &src); + } + + //! @brief Byte swap (32-bit or 64-bit registers only) (i486). + inline void bswap(const GPVar& dst) + { + // ASMJIT_ASSERT(dst.getRegType() == REG_GPD || dst.getRegType() == REG_GPQ); + _emitInstruction(INST_BSWAP, &dst); + } + + //! @brief Bit test. + inline void bt(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_BT, &dst, &src); + } + //! @brief Bit test. + inline void bt(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_BT, &dst, &src); + } + //! @brief Bit test. + inline void bt(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_BT, &dst, &src); + } + //! @brief Bit test. + inline void bt(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_BT, &dst, &src); + } + + //! @brief Bit test and complement. + inline void btc(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_BTC, &dst, &src); + } + //! @brief Bit test and complement. + inline void btc(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_BTC, &dst, &src); + } + //! @brief Bit test and complement. + inline void btc(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_BTC, &dst, &src); + } + //! @brief Bit test and complement. + inline void btc(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_BTC, &dst, &src); + } + + //! @brief Bit test and reset. + inline void btr(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_BTR, &dst, &src); + } + //! @brief Bit test and reset. + inline void btr(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_BTR, &dst, &src); + } + //! @brief Bit test and reset. + inline void btr(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_BTR, &dst, &src); + } + //! @brief Bit test and reset. + inline void btr(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_BTR, &dst, &src); + } + + //! @brief Bit test and set. + inline void bts(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_BTS, &dst, &src); + } + //! @brief Bit test and set. + inline void bts(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_BTS, &dst, &src); + } + //! @brief Bit test and set. + inline void bts(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_BTS, &dst, &src); + } + //! @brief Bit test and set. + inline void bts(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_BTS, &dst, &src); + } + + //! @brief Call Procedure. + inline ECall* call(const GPVar& dst) + { + return _emitCall(&dst); + } + //! @brief Call Procedure. + inline ECall* call(const Mem& dst) + { + return _emitCall(&dst); + } + //! @brief Call Procedure. + inline ECall* call(const Imm& dst) + { + return _emitCall(&dst); + } + //! @brief Call Procedure. + //! @overload + inline ECall* call(void* dst) + { + Imm imm((sysint_t)dst); + return _emitCall(&imm); + } + + //! @brief Call Procedure. + inline ECall* call(const Label& label) + { + return _emitCall(&label); + } + + //! @brief Convert Byte to Word (Sign Extend). + inline void cbw(const GPVar& dst) + { + _emitInstruction(INST_CBW, &dst); + } + + //! @brief Convert Word to DWord (Sign Extend). + inline void cwde(const GPVar& dst) + { + _emitInstruction(INST_CWDE, &dst); + } + +#if defined(ASMJIT_X64) + //! @brief Convert DWord to QWord (Sign Extend). + inline void cdqe(const GPVar& dst) + { + _emitInstruction(INST_CDQE, &dst); + } +#endif // ASMJIT_X64 + + //! @brief Clear Carry flag + //! + //! This instruction clears the CF flag in the EFLAGS register. + inline void clc() + { + _emitInstruction(INST_CLC); + } + + //! @brief Clear Direction flag + //! + //! This instruction clears the DF flag in the EFLAGS register. + inline void cld() + { + _emitInstruction(INST_CLD); + } + + //! @brief Complement Carry Flag. + //! + //! This instruction complements the CF flag in the EFLAGS register. + //! (CF = NOT CF) + inline void cmc() + { + _emitInstruction(INST_CMC); + } + + //! @brief Conditional Move. + inline void cmov(CONDITION cc, const GPVar& dst, const GPVar& src) + { + _emitInstruction(ConditionToInstruction::toCMovCC(cc), &dst, &src); + } + + //! @brief Conditional Move. + inline void cmov(CONDITION cc, const GPVar& dst, const Mem& src) + { + _emitInstruction(ConditionToInstruction::toCMovCC(cc), &dst, &src); + } + + //! @brief Conditional Move. + inline void cmova (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVA , &dst, &src); } + //! @brief Conditional Move. + inline void cmova (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVA , &dst, &src); } + //! @brief Conditional Move. + inline void cmovae (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVAE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovae (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVAE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovb (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVB , &dst, &src); } + //! @brief Conditional Move. + inline void cmovb (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVB , &dst, &src); } + //! @brief Conditional Move. + inline void cmovbe (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVBE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovbe (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVBE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovc (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVC , &dst, &src); } + //! @brief Conditional Move. + inline void cmovc (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVC , &dst, &src); } + //! @brief Conditional Move. + inline void cmove (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVE , &dst, &src); } + //! @brief Conditional Move. + inline void cmove (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovg (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVG , &dst, &src); } + //! @brief Conditional Move. + inline void cmovg (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVG , &dst, &src); } + //! @brief Conditional Move. + inline void cmovge (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVGE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovge (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVGE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovl (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVL , &dst, &src); } + //! @brief Conditional Move. + inline void cmovl (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVL , &dst, &src); } + //! @brief Conditional Move. + inline void cmovle (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVLE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovle (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVLE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovna (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNA , &dst, &src); } + //! @brief Conditional Move. + inline void cmovna (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNA , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnae(const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNAE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnae(const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNAE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnb (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNB , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnb (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNB , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnbe(const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNBE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnbe(const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNBE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnc (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNC , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnc (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNC , &dst, &src); } + //! @brief Conditional Move. + inline void cmovne (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovne (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovng (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNG , &dst, &src); } + //! @brief Conditional Move. + inline void cmovng (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNG , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnge(const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNGE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnge(const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNGE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnl (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNL , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnl (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNL , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnle(const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNLE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovnle(const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNLE, &dst, &src); } + //! @brief Conditional Move. + inline void cmovno (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNO , &dst, &src); } + //! @brief Conditional Move. + inline void cmovno (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNO , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnp (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNP , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnp (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNP , &dst, &src); } + //! @brief Conditional Move. + inline void cmovns (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNS , &dst, &src); } + //! @brief Conditional Move. + inline void cmovns (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNS , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnz (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVNZ , &dst, &src); } + //! @brief Conditional Move. + inline void cmovnz (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVNZ , &dst, &src); } + //! @brief Conditional Move. + inline void cmovo (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVO , &dst, &src); } + //! @brief Conditional Move. + inline void cmovo (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVO , &dst, &src); } + //! @brief Conditional Move. + inline void cmovp (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVP , &dst, &src); } + //! @brief Conditional Move. + inline void cmovp (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVP , &dst, &src); } + //! @brief Conditional Move. + inline void cmovpe (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVPE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovpe (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVPE , &dst, &src); } + //! @brief Conditional Move. + inline void cmovpo (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVPO , &dst, &src); } + //! @brief Conditional Move. + inline void cmovpo (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVPO , &dst, &src); } + //! @brief Conditional Move. + inline void cmovs (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVS , &dst, &src); } + //! @brief Conditional Move. + inline void cmovs (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVS , &dst, &src); } + //! @brief Conditional Move. + inline void cmovz (const GPVar& dst, const GPVar& src) { _emitInstruction(INST_CMOVZ , &dst, &src); } + //! @brief Conditional Move. + inline void cmovz (const GPVar& dst, const Mem& src) { _emitInstruction(INST_CMOVZ , &dst, &src); } + + //! @brief Compare Two Operands. + inline void cmp(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_CMP, &dst, &src); + } + //! @brief Compare Two Operands. + inline void cmp(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_CMP, &dst, &src); + } + //! @brief Compare Two Operands. + inline void cmp(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_CMP, &dst, &src); + } + //! @brief Compare Two Operands. + inline void cmp(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_CMP, &dst, &src); + } + //! @brief Compare Two Operands. + inline void cmp(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_CMP, &dst, &src); + } + + //! @brief Compare and Exchange (i486). + inline void cmpxchg(const GPVar cmp_1_eax, const GPVar& cmp_2, const GPVar& src) + { + ASMJIT_ASSERT(cmp_1_eax.getId() != src.getId()); + _emitInstruction(INST_CMPXCHG, &cmp_1_eax, &cmp_2, &src); + } + //! @brief Compare and Exchange (i486). + inline void cmpxchg(const GPVar cmp_1_eax, const Mem& cmp_2, const GPVar& src) + { + ASMJIT_ASSERT(cmp_1_eax.getId() != src.getId()); + _emitInstruction(INST_CMPXCHG, &cmp_1_eax, &cmp_2, &src); + } + + //! @brief Compares the 64-bit value in EDX:EAX with the memory operand (Pentium). + //! + //! If the values are equal, then this instruction stores the 64-bit value + //! in ECX:EBX into the memory operand and sets the zero flag. Otherwise, + //! this instruction copies the 64-bit memory operand into the EDX:EAX + //! registers and clears the zero flag. + inline void cmpxchg8b( + const GPVar& cmp_edx, const GPVar& cmp_eax, + const GPVar& cmp_ecx, const GPVar& cmp_ebx, + const Mem& dst) + { + ASMJIT_ASSERT(cmp_edx.getId() != cmp_eax.getId() && + cmp_eax.getId() != cmp_ecx.getId() && + cmp_ecx.getId() != cmp_ebx.getId()); + + _emitInstruction(INST_CMPXCHG8B, &cmp_edx, &cmp_eax, &cmp_ecx, &cmp_ebx, &dst); + } + +#if defined(ASMJIT_X64) + //! @brief Compares the 128-bit value in RDX:RAX with the memory operand (X64). + //! + //! If the values are equal, then this instruction stores the 128-bit value + //! in RCX:RBX into the memory operand and sets the zero flag. Otherwise, + //! this instruction copies the 128-bit memory operand into the RDX:RAX + //! registers and clears the zero flag. + inline void cmpxchg16b( + const GPVar& cmp_edx, const GPVar& cmp_eax, + const GPVar& cmp_ecx, const GPVar& cmp_ebx, + const Mem& dst) + { + ASMJIT_ASSERT(cmp_edx.getId() != cmp_eax.getId() && + cmp_eax.getId() != cmp_ecx.getId() && + cmp_ecx.getId() != cmp_ebx.getId()); + + _emitInstruction(INST_CMPXCHG16B, &cmp_edx, &cmp_eax, &cmp_ecx, &cmp_ebx, &dst); + } +#endif // ASMJIT_X64 + + //! @brief CPU Identification (i486). + inline void cpuid( + const GPVar& inout_eax, + const GPVar& out_ebx, + const GPVar& out_ecx, + const GPVar& out_edx) + { + // Destination variables must be different. + ASMJIT_ASSERT(inout_eax.getId() != out_ebx.getId() && + out_ebx.getId() != out_ecx.getId() && + out_ecx.getId() != out_edx.getId()); + + _emitInstruction(INST_CPUID, &inout_eax, &out_ebx, &out_ecx, &out_edx); + } + +#if defined(ASMJIT_X86) + inline void daa(const GPVar& dst) + { + _emitInstruction(INST_DAA, &dst); + } +#endif // ASMJIT_X86 + +#if defined(ASMJIT_X86) + inline void das(const GPVar& dst) + { + _emitInstruction(INST_DAS, &dst); + } +#endif // ASMJIT_X86 + + //! @brief Decrement by 1. + //! @note This instruction can be slower than sub(dst, 1) + inline void dec(const GPVar& dst) + { + _emitInstruction(INST_DEC, &dst); + } + //! @brief Decrement by 1. + //! @note This instruction can be slower than sub(dst, 1) + inline void dec(const Mem& dst) + { + _emitInstruction(INST_DEC, &dst); + } + + //! @brief Unsigned divide. + //! + //! This instruction divides (unsigned) the value in the AL, AX, or EAX + //! register by the source operand and stores the result in the AX, + //! DX:AX, or EDX:EAX registers. + inline void div_lo_hi(const GPVar& dst_lo, const GPVar& dst_hi, const GPVar& src) + { + // Destination variables must be different. + ASMJIT_ASSERT(dst_lo.getId() != dst_hi.getId()); + + _emitInstruction(INST_DIV, &dst_lo, &dst_hi, &src); + } + //! @brief Unsigned divide. + //! @overload + inline void div_lo_hi(const GPVar& dst_lo, const GPVar& dst_hi, const Mem& src) + { + // Destination variables must be different. + ASMJIT_ASSERT(dst_lo.getId() != dst_hi.getId()); + + _emitInstruction(INST_DIV, &dst_lo, &dst_hi, &src); + } + +#if ASMJIT_NOT_SUPPORTED_BY_COMPILER + //! @brief Make Stack Frame for Procedure Parameters. + inline void enter(const Imm& imm16, const Imm& imm8) + { + _emitInstruction(INST_ENTER, &imm16, &imm8); + } +#endif // ASMJIT_NOT_SUPPORTED_BY_COMPILER + + //! @brief Signed divide. + //! + //! This instruction divides (signed) the value in the AL, AX, or EAX + //! register by the source operand and stores the result in the AX, + //! DX:AX, or EDX:EAX registers. + inline void idiv_lo_hi(const GPVar& dst_lo, const GPVar& dst_hi, const GPVar& src) + { + // Destination variables must be different. + ASMJIT_ASSERT(dst_lo.getId() != dst_hi.getId()); + + _emitInstruction(INST_IDIV, &dst_lo, &dst_hi, &src); + } + //! @brief Signed divide. + //! @overload + inline void idiv_lo_hi(const GPVar& dst_lo, const GPVar& dst_hi, const Mem& src) + { + // Destination variables must be different. + ASMJIT_ASSERT(dst_lo.getId() != dst_hi.getId()); + + _emitInstruction(INST_IDIV, &dst_lo, &dst_hi, &src); + } + + //! @brief Signed multiply. + //! + //! [dst_lo:dst_hi] = dst_hi * src. + inline void imul_lo_hi(const GPVar& dst_lo, const GPVar& dst_hi, const GPVar& src) + { + // Destination variables must be different. + ASMJIT_ASSERT(dst_lo.getId() != dst_hi.getId()); + + _emitInstruction(INST_IMUL, &dst_lo, &dst_hi, &src); + } + //! @overload + inline void imul_lo_hi(const GPVar& dst_lo, const GPVar& dst_hi, const Mem& src) + { + // Destination variables must be different. + ASMJIT_ASSERT(dst_lo.getId() != dst_hi.getId()); + + _emitInstruction(INST_IMUL, &dst_lo, &dst_hi, &src); + } + + //! @brief Signed multiply. + //! + //! Destination operand (the first operand) is multiplied by the source + //! operand (second operand). The destination operand is a general-purpose + //! register and the source operand is an immediate value, a general-purpose + //! register, or a memory location. The product is then stored in the + //! destination operand location. + inline void imul(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_IMUL, &dst, &src); + } + //! @brief Signed multiply. + //! @overload + inline void imul(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_IMUL, &dst, &src); + } + //! @brief Signed multiply. + //! @overload + inline void imul(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_IMUL, &dst, &src); + } + + //! @brief Signed multiply. + //! + //! source operand (which can be a general-purpose register or a memory + //! location) is multiplied by the second source operand (an immediate + //! value). The product is then stored in the destination operand + //! (a general-purpose register). + inline void imul(const GPVar& dst, const GPVar& src, const Imm& imm) + { + _emitInstruction(INST_IMUL, &dst, &src, &imm); + } + //! @overload + inline void imul(const GPVar& dst, const Mem& src, const Imm& imm) + { + _emitInstruction(INST_IMUL, &dst, &src, &imm); + } + + //! @brief Increment by 1. + //! @note This instruction can be slower than add(dst, 1) + inline void inc(const GPVar& dst) + { + _emitInstruction(INST_INC, &dst); + } + //! @brief Increment by 1. + //! @note This instruction can be slower than add(dst, 1) + inline void inc(const Mem& dst) + { + _emitInstruction(INST_INC, &dst); + } + + //! @brief Interrupt 3 - trap to debugger. + inline void int3() + { + _emitInstruction(INST_INT3); + } + + //! @brief Jump to label @a label if condition @a cc is met. + //! + //! This instruction checks the state of one or more of the status flags in + //! the EFLAGS register (CF, OF, PF, SF, and ZF) and, if the flags are in the + //! specified state (condition), performs a jump to the target instruction + //! specified by the destination operand. A condition code (cc) is associated + //! with each instruction to indicate the condition being tested for. If the + //! condition is not satisfied, the jump is not performed and execution + //! continues with the instruction following the Jcc instruction. + inline void j(CONDITION cc, const Label& label, uint32_t hint = HINT_NONE) + { + _emitJcc(ConditionToInstruction::toJCC(cc), &label, hint); + } + + //! @brief Jump to label @a label if condition is met. + inline void ja (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JA , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jae (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JAE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jb (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JB , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jbe (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JBE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jc (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JC , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void je (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jg (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JG , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jge (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JGE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jl (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JL , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jle (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JLE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jna (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNA , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnae(const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNAE, &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnb (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNB , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnbe(const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNBE, &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnc (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNC , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jne (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jng (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNG , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnge(const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNGE, &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnl (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNL , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnle(const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNLE, &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jno (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNO , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnp (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNP , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jns (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNS , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jnz (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JNZ , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jo (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JO , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jp (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JP , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jpe (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JPE , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jpo (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JPO , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void js (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JS , &label, hint); } + //! @brief Jump to label @a label if condition is met. + inline void jz (const Label& label, uint32_t hint = HINT_NONE) { _emitJcc(INST_JZ , &label, hint); } + + //! @brief Jump. + //! @overload + inline void jmp(const GPVar& dst) + { + _emitInstruction(INST_JMP, &dst); + } + //! @brief Jump. + //! @overload + inline void jmp(const Mem& dst) + { + _emitInstruction(INST_JMP, &dst); + } + //! @brief Jump. + //! @overload + inline void jmp(const Imm& dst) + { + _emitInstruction(INST_JMP, &dst); + } + + //! @brief Jump. + //! @overload + inline void jmp(void* dst) + { + Imm imm((sysint_t)dst); + _emitInstruction(INST_JMP, &imm); + } + + //! @brief Jump. + //! + //! This instruction transfers program control to a different point + //! in the instruction stream without recording return information. + //! The destination (target) operand specifies the label of the + //! instruction being jumped to. + inline void jmp(const Label& label) + { + _emitInstruction(INST_JMP, &label); + } + //! @brief Load Effective Address + //! + //! This instruction computes the effective address of the second + //! operand (the source operand) and stores it in the first operand + //! (destination operand). The source operand is a memory address + //! (offset part) specified with one of the processors addressing modes. + //! The destination operand is a general-purpose register. + inline void lea(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_LEA, &dst, &src); + } + +#if ASMJIT_NOT_SUPPORTED_BY_COMPILER + //! @brief High Level Procedure Exit. + inline void leave() + { + _emitInstruction(INST_LEAVE); + } +#endif // ASMJIT_NOT_SUPPORTED_BY_COMPILER + + //! @brief Move. + //! + //! This instruction copies the second operand (source operand) to the first + //! operand (destination operand). The source operand can be an immediate + //! value, general-purpose register, segment register, or memory location. + //! The destination register can be a general-purpose register, segment + //! register, or memory location. Both operands must be the same size, which + //! can be a byte, a word, or a DWORD. + //! + //! @note To move MMX or SSE registers to/from GP registers or memory, use + //! corresponding functions: @c movd(), @c movq(), etc. Passing MMX or SSE + //! registers to @c mov() is illegal. + inline void mov(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_MOV, &dst, &src); + } + //! @brief Move. + //! @overload + inline void mov(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_MOV, &dst, &src); + } + //! @brief Move. + //! @overload + inline void mov(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_MOV, &dst, &src); + } + //! @brief Move. + //! @overload + inline void mov(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_MOV, &dst, &src); + } + //! @brief Move. + //! @overload + inline void mov(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_MOV, &dst, &src); + } + + //! @brief Move byte, word, dword or qword from absolute address @a src to + //! AL, AX, EAX or RAX register. + inline void mov_ptr(const GPVar& dst, void* src) + { + Imm imm((sysint_t)src); + _emitInstruction(INST_MOV_PTR, &dst, &imm); + } + + //! @brief Move byte, word, dword or qword from AL, AX, EAX or RAX register + //! to absolute address @a dst. + inline void mov_ptr(void* dst, const GPVar& src) + { + Imm imm((sysint_t)dst); + _emitInstruction(INST_MOV_PTR, &imm, &src); + } + + //! @brief Move with Sign-Extension. + //! + //! This instruction copies the contents of the source operand (register + //! or memory location) to the destination operand (register) and sign + //! extends the value to 16, 32 or 64-bits. + //! + //! @sa movsxd(). + void movsx(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_MOVSX, &dst, &src); + } + //! @brief Move with Sign-Extension. + //! @overload + void movsx(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVSX, &dst, &src); + } + +#if defined(ASMJIT_X64) + //! @brief Move DWord to QWord with sign-extension. + inline void movsxd(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_MOVSXD, &dst, &src); + } + //! @brief Move DWord to QWord with sign-extension. + //! @overload + inline void movsxd(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVSXD, &dst, &src); + } +#endif // ASMJIT_X64 + + //! @brief Move with Zero-Extend. + //! + //! This instruction copies the contents of the source operand (register + //! or memory location) to the destination operand (register) and zero + //! extends the value to 16 or 32-bits. The size of the converted value + //! depends on the operand-size attribute. + inline void movzx(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_MOVZX, &dst, &src); + } + //! @brief Move with Zero-Extend. + //! @brief Overload + inline void movzx(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVZX, &dst, &src); + } + + //! @brief Unsigned multiply. + //! + //! Source operand (in a general-purpose register or memory location) + //! is multiplied by the value in the AL, AX, or EAX register (depending + //! on the operand size) and the product is stored in the AX, DX:AX, or + //! EDX:EAX registers, respectively. + inline void mul_lo_hi(const GPVar& dst_lo, const GPVar& dst_hi, const GPVar& src) + { + // Destination variables must be different. + ASMJIT_ASSERT(dst_lo.getId() != dst_hi.getId()); + + _emitInstruction(INST_MUL, &dst_lo, &dst_hi, &src); + } + //! @brief Unsigned multiply. + //! @overload + inline void mul_lo_hi(const GPVar& dst_lo, const GPVar& dst_hi, const Mem& src) + { + // Destination variables must be different. + ASMJIT_ASSERT(dst_lo.getId() != dst_hi.getId()); + + _emitInstruction(INST_MUL, &dst_lo, &dst_hi, &src); + } + + //! @brief Two's Complement Negation. + inline void neg(const GPVar& dst) + { + _emitInstruction(INST_NEG, &dst); + } + //! @brief Two's Complement Negation. + inline void neg(const Mem& dst) + { + _emitInstruction(INST_NEG, &dst); + } + + //! @brief No Operation. + //! + //! This instruction performs no operation. This instruction is a one-byte + //! instruction that takes up space in the instruction stream but does not + //! affect the machine context, except the EIP register. The NOP instruction + //! is an alias mnemonic for the XCHG (E)AX, (E)AX instruction. + inline void nop() + { + _emitInstruction(INST_NOP); + } + + //! @brief One's Complement Negation. + inline void not_(const GPVar& dst) + { + _emitInstruction(INST_NOT, &dst); + } + //! @brief One's Complement Negation. + inline void not_(const Mem& dst) + { + _emitInstruction(INST_NOT, &dst); + } + + //! @brief Logical Inclusive OR. + inline void or_(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_OR, &dst, &src); + } + //! @brief Logical Inclusive OR. + inline void or_(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_OR, &dst, &src); + } + //! @brief Logical Inclusive OR. + inline void or_(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_OR, &dst, &src); + } + //! @brief Logical Inclusive OR. + inline void or_(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_OR, &dst, &src); + } + //! @brief Logical Inclusive OR. + inline void or_(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_OR, &dst, &src); + } + + //! @brief Pop a Value from the Stack. + //! + //! This instruction loads the value from the top of the stack to the location + //! specified with the destination operand and then increments the stack pointer. + //! The destination operand can be a general purpose register, memory location, + //! or segment register. + inline void pop(const GPVar& dst) + { + _emitInstruction(INST_POP, &dst); + } + + inline void pop(const Mem& dst) + { + ASMJIT_ASSERT(dst.getSize() == 2 || dst.getSize() == sizeof(sysint_t)); + _emitInstruction(INST_POP, &dst); + } + +#if defined(ASMJIT_X86) + //! @brief Pop All General-Purpose Registers. + //! + //! Pop EDI, ESI, EBP, EBX, EDX, ECX, and EAX. + inline void popad() + { + _emitInstruction(INST_POPAD); + } +#endif // ASMJIT_X86 + + //! @brief Pop Stack into EFLAGS Register (32-bit or 64-bit). + inline void popf() + { +#if defined(ASMJIT_X86) + popfd(); +#else + popfq(); +#endif + } + +#if defined(ASMJIT_X86) + //! @brief Pop Stack into EFLAGS Register (32-bit). + inline void popfd() { _emitInstruction(INST_POPFD); } +#else + //! @brief Pop Stack into EFLAGS Register (64-bit). + inline void popfq() { _emitInstruction(INST_POPFQ); } +#endif + + //! @brief Push WORD/DWORD/QWORD Onto the Stack. + //! + //! @note 32-bit architecture pushed DWORD while 64-bit + //! pushes QWORD. 64-bit mode not provides instruction to + //! push 32-bit register/memory. + inline void push(const GPVar& src) + { + _emitInstruction(INST_PUSH, &src); + } + //! @brief Push WORD/DWORD/QWORD Onto the Stack. + inline void push(const Mem& src) + { + ASMJIT_ASSERT(src.getSize() == 2 || src.getSize() == sizeof(sysint_t)); + _emitInstruction(INST_PUSH, &src); + } + //! @brief Push WORD/DWORD/QWORD Onto the Stack. + inline void push(const Imm& src) + { + _emitInstruction(INST_PUSH, &src); + } + +#if defined(ASMJIT_X86) + //! @brief Push All General-Purpose Registers. + //! + //! Push EAX, ECX, EDX, EBX, original ESP, EBP, ESI, and EDI. + inline void pushad() + { + _emitInstruction(INST_PUSHAD); + } +#endif // ASMJIT_X86 + + //! @brief Push EFLAGS Register (32-bit or 64-bit) onto the Stack. + inline void pushf() + { +#if defined(ASMJIT_X86) + pushfd(); +#else + pushfq(); +#endif + } + +#if defined(ASMJIT_X86) + //! @brief Push EFLAGS Register (32-bit) onto the Stack. + inline void pushfd() { _emitInstruction(INST_PUSHFD); } +#else + //! @brief Push EFLAGS Register (64-bit) onto the Stack. + inline void pushfq() { _emitInstruction(INST_PUSHFQ); } +#endif // ASMJIT_X86 + + //! @brief Rotate Bits Left. + //! @note @a src register can be only @c cl. + inline void rcl(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_RCL, &dst, &src); + } + //! @brief Rotate Bits Left. + inline void rcl(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_RCL, &dst, &src); + } + //! @brief Rotate Bits Left. + //! @note @a src register can be only @c cl. + inline void rcl(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_RCL, &dst, &src); + } + //! @brief Rotate Bits Left. + inline void rcl(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_RCL, &dst, &src); + } + + //! @brief Rotate Bits Right. + //! @note @a src register can be only @c cl. + inline void rcr(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_RCR, &dst, &src); + } + //! @brief Rotate Bits Right. + inline void rcr(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_RCR, &dst, &src); + } + //! @brief Rotate Bits Right. + //! @note @a src register can be only @c cl. + inline void rcr(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_RCR, &dst, &src); + } + //! @brief Rotate Bits Right. + inline void rcr(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_RCR, &dst, &src); + } + + //! @brief Read Time-Stamp Counter (Pentium). + inline void rdtsc(const GPVar& dst_edx, const GPVar& dst_eax) + { + // Destination registers must be different. + ASMJIT_ASSERT(dst_edx.getId() != dst_eax.getId()); + + _emitInstruction(INST_RDTSC, &dst_edx, &dst_eax); + } + + //! @brief Read Time-Stamp Counter and Processor ID (New). + inline void rdtscp(const GPVar& dst_edx, const GPVar& dst_eax, const GPVar& dst_ecx) + { + // Destination registers must be different. + ASMJIT_ASSERT(dst_edx.getId() != dst_eax.getId() && + dst_eax.getId() != dst_ecx.getId()); + + _emitInstruction(INST_RDTSCP, &dst_edx, &dst_eax, &dst_ecx); + } + + //! @brief Load ECX/RCX BYTEs from DS:[ESI/RSI] to AL. + inline void rep_lodsb(const GPVar& dst_val, const GPVar& src_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to dst=EAX,RAX, src=DS:ESI/RSI, cnt=ECX/RCX. + ASMJIT_ASSERT(dst_val.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REP_LODSB, &dst_val, &src_addr, &cnt_ecx); + } + + //! @brief Load ECX/RCX DWORDs from DS:[ESI/RSI] to EAX. + inline void rep_lodsd(const GPVar& dst_val, const GPVar& src_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to dst=EAX,RAX, src=DS:ESI/RSI, cnt=ECX/RCX. + ASMJIT_ASSERT(dst_val.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REP_LODSD, &dst_val, &src_addr, &cnt_ecx); + } + +#if defined(ASMJIT_X64) + //! @brief Load ECX/RCX QWORDs from DS:[ESI/RSI] to RAX. + inline void rep_lodsq(const GPVar& dst_val, const GPVar& src_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to dst=EAX,RAX, src=DS:ESI/RSI, cnt=ECX/RCX. + ASMJIT_ASSERT(dst_val.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REP_LODSQ, &dst_val, &src_addr, &cnt_ecx); + } +#endif // ASMJIT_X64 + //! @brief Load ECX/RCX WORDs from DS:[ESI/RSI] to AX. + inline void rep_lodsw(const GPVar& dst_val, const GPVar& src_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to dst=EAX,RAX, src=DS:ESI/RSI, cnt=ECX/RCX. + ASMJIT_ASSERT(dst_val.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REP_LODSW, &dst_val, &src_addr, &cnt_ecx); + } + + //! @brief Move ECX/RCX BYTEs from DS:[ESI/RSI] to ES:[EDI/RDI]. + inline void rep_movsb(const GPVar& dst_addr, const GPVar& src_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=DS:ESI/RSI, cnt=ECX/RCX. + ASMJIT_ASSERT(dst_addr.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REP_MOVSB, &dst_addr, &src_addr, &cnt_ecx); + } + + //! @brief Move ECX/RCX DWORDs from DS:[ESI/RSI] to ES:[EDI/RDI]. + inline void rep_movsd(const GPVar& dst_addr, const GPVar& src_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=DS:ESI/RSI, cnt=ECX/RCX. + ASMJIT_ASSERT(dst_addr.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REP_MOVSD, &dst_addr, &src_addr, &cnt_ecx); + } + +#if defined(ASMJIT_X64) + //! @brief Move ECX/RCX QWORDs from DS:[ESI/RSI] to ES:[EDI/RDI]. + inline void rep_movsq(const GPVar& dst_addr, const GPVar& src_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=DS:ESI/RSI, cnt=ECX/RCX. + ASMJIT_ASSERT(dst_addr.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REP_MOVSQ, &dst_addr, &src_addr, &cnt_ecx); + } +#endif // ASMJIT_X64 + + //! @brief Move ECX/RCX WORDs from DS:[ESI/RSI] to ES:[EDI/RDI]. + inline void rep_movsw(const GPVar& dst_addr, const GPVar& src_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=DS:ESI/RSI, cnt=ECX/RCX. + ASMJIT_ASSERT(dst_addr.getId() != src_addr.getId() && src_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REP_MOVSW, &dst_addr, &src_addr, &cnt_ecx); + } + + //! @brief Fill ECX/RCX BYTEs at ES:[EDI/RDI] with AL. + inline void rep_stosb(const GPVar& dst_addr, const GPVar& src_val, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=EAX/RAX, cnt=ECX/RCX. + ASMJIT_ASSERT(dst_addr.getId() != src_val.getId() && src_val.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REP_STOSB, &dst_addr, &src_val, &cnt_ecx); + } + + //! @brief Fill ECX/RCX DWORDs at ES:[EDI/RDI] with EAX. + inline void rep_stosd(const GPVar& dst_addr, const GPVar& src_val, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=EAX/RAX, cnt=ECX/RCX. + ASMJIT_ASSERT(dst_addr.getId() != src_val.getId() && src_val.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REP_STOSD, &dst_addr, &src_val, &cnt_ecx); + } + +#if defined(ASMJIT_X64) + //! @brief Fill ECX/RCX QWORDs at ES:[EDI/RDI] with RAX. + inline void rep_stosq(const GPVar& dst_addr, const GPVar& src_val, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=EAX/RAX, cnt=ECX/RCX. + ASMJIT_ASSERT(dst_addr.getId() != src_val.getId() && src_val.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REP_STOSQ, &dst_addr, &src_val, &cnt_ecx); + } +#endif // ASMJIT_X64 + + //! @brief Fill ECX/RCX WORDs at ES:[EDI/RDI] with AX. + inline void rep_stosw(const GPVar& dst_addr, const GPVar& src_val, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to dst=ES:EDI,RDI, src=EAX/RAX, cnt=ECX/RCX. + ASMJIT_ASSERT(dst_addr.getId() != src_val.getId() && src_val.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REP_STOSW, &dst_addr, &src_val, &cnt_ecx); + } + + //! @brief Repeated find nonmatching BYTEs in ES:[EDI/RDI] and DS:[ESI/RDI]. + inline void repe_cmpsb(const GPVar& cmp1_addr, const GPVar& cmp2_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPE_CMPSB, &cmp1_addr, &cmp2_addr, &cnt_ecx); + } + + //! @brief Repeated find nonmatching DWORDs in ES:[EDI/RDI] and DS:[ESI/RDI]. + inline void repe_cmpsd(const GPVar& cmp1_addr, const GPVar& cmp2_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPE_CMPSD, &cmp1_addr, &cmp2_addr, &cnt_ecx); + } + +#if defined(ASMJIT_X64) + //! @brief Repeated find nonmatching QWORDs in ES:[EDI/RDI] and DS:[ESI/RDI]. + inline void repe_cmpsq(const GPVar& cmp1_addr, const GPVar& cmp2_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPE_CMPSQ, &cmp1_addr, &cmp2_addr, &cnt_ecx); + } +#endif // ASMJIT_X64 + + //! @brief Repeated find nonmatching WORDs in ES:[EDI/RDI] and DS:[ESI/RDI]. + inline void repe_cmpsw(const GPVar& cmp1_addr, const GPVar& cmp2_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPE_CMPSW, &cmp1_addr, &cmp2_addr, &cnt_ecx); + } + + //! @brief Find non-AL BYTE starting at ES:[EDI/RDI]. + inline void repe_scasb(const GPVar& cmp1_addr, const GPVar& cmp2_val, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=AL, cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPE_SCASB, &cmp1_addr, &cmp2_val, &cnt_ecx); + } + + //! @brief Find non-EAX DWORD starting at ES:[EDI/RDI]. + inline void repe_scasd(const GPVar& cmp1_addr, const GPVar& cmp2_val, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=EAX, cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPE_SCASD, &cmp1_addr, &cmp2_val, &cnt_ecx); + } + +#if defined(ASMJIT_X64) + //! @brief Find non-RAX QWORD starting at ES:[EDI/RDI]. + inline void repe_scasq(const GPVar& cmp1_addr, const GPVar& cmp2_val, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=RAX, cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPE_SCASQ, &cmp1_addr, &cmp2_val, &cnt_ecx); + } +#endif // ASMJIT_X64 + + //! @brief Find non-AX WORD starting at ES:[EDI/RDI]. + inline void repe_scasw(const GPVar& cmp1_addr, const GPVar& cmp2_val, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=AX, cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPE_SCASW, &cmp1_addr, &cmp2_val, &cnt_ecx); + } + + //! @brief Find matching BYTEs in [RDI] and [RSI]. + inline void repne_cmpsb(const GPVar& cmp1_addr, const GPVar& cmp2_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPNE_CMPSB, &cmp1_addr, &cmp2_addr, &cnt_ecx); + } + + //! @brief Find matching DWORDs in [RDI] and [RSI]. + inline void repne_cmpsd(const GPVar& cmp1_addr, const GPVar& cmp2_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPNE_CMPSD, &cmp1_addr, &cmp2_addr, &cnt_ecx); + } + +#if defined(ASMJIT_X64) + //! @brief Find matching QWORDs in [RDI] and [RSI]. + inline void repne_cmpsq(const GPVar& cmp1_addr, const GPVar& cmp2_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPNE_CMPSQ, &cmp1_addr, &cmp2_addr, &cnt_ecx); + } +#endif // ASMJIT_X64 + + //! @brief Find matching WORDs in [RDI] and [RSI]. + inline void repne_cmpsw(const GPVar& cmp1_addr, const GPVar& cmp2_addr, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, cmp2=ES:[EDI/RDI], cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_addr.getId() && cmp2_addr.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPNE_CMPSW, &cmp1_addr, &cmp2_addr, &cnt_ecx); + } + + //! @brief Find AL, starting at ES:[EDI/RDI]. + inline void repne_scasb(const GPVar& cmp1_addr, const GPVar& cmp2_val, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=AL, cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPNE_SCASB, &cmp1_addr, &cmp2_val, &cnt_ecx); + } + + //! @brief Find EAX, starting at ES:[EDI/RDI]. + inline void repne_scasd(const GPVar& cmp1_addr, const GPVar& cmp2_val, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=EAX, cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPNE_SCASD, &cmp1_addr, &cmp2_val, &cnt_ecx); + } + +#if defined(ASMJIT_X64) + //! @brief Find RAX, starting at ES:[EDI/RDI]. + inline void repne_scasq(const GPVar& cmp1_addr, const GPVar& cmp2_val, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=RAX, cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPNE_SCASQ, &cmp1_addr, &cmp2_val, &cnt_ecx); + } +#endif // ASMJIT_X64 + + //! @brief Find AX, starting at ES:[EDI/RDI]. + inline void repne_scasw(const GPVar& cmp1_addr, const GPVar& cmp2_val, const GPVar& cnt_ecx) + { + // All registers must be unique, they will be reallocated to cmp1=ES:EDI,RDI, src=AX, cnt=ECX/RCX. + ASMJIT_ASSERT(cmp1_addr.getId() != cmp2_val.getId() && cmp2_val.getId() != cnt_ecx.getId()); + + _emitInstruction(INST_REPNE_SCASW, &cmp1_addr, &cmp2_val, &cnt_ecx); + } + + //! @brief Return from Procedure. + inline void ret() + { + _emitReturn(NULL, NULL); + } + + //! @brief Return from Procedure. + inline void ret(const GPVar& first) + { + _emitReturn(&first, NULL); + } + + //! @brief Return from Procedure. + inline void ret(const GPVar& first, const GPVar& second) + { + _emitReturn(&first, &second); + } + + //! @brief Return from Procedure. + inline void ret(const XMMVar& first) + { + _emitReturn(&first, NULL); + } + + //! @brief Return from Procedure. + inline void ret(const XMMVar& first, const XMMVar& second) + { + _emitReturn(&first, &second); + } + + //! @brief Rotate Bits Left. + //! @note @a src register can be only @c cl. + inline void rol(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_ROL, &dst, &src); + } + //! @brief Rotate Bits Left. + inline void rol(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_ROL, &dst, &src); + } + //! @brief Rotate Bits Left. + //! @note @a src register can be only @c cl. + inline void rol(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_ROL, &dst, &src); + } + //! @brief Rotate Bits Left. + inline void rol(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_ROL, &dst, &src); + } + + //! @brief Rotate Bits Right. + //! @note @a src register can be only @c cl. + inline void ror(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_ROR, &dst, &src); + } + //! @brief Rotate Bits Right. + inline void ror(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_ROR, &dst, &src); + } + //! @brief Rotate Bits Right. + //! @note @a src register can be only @c cl. + inline void ror(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_ROR, &dst, &src); + } + //! @brief Rotate Bits Right. + inline void ror(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_ROR, &dst, &src); + } + +#if defined(ASMJIT_X86) + //! @brief Store @a var (allocated to AH/AX/EAX/RAX) into Flags. + inline void sahf(const GPVar& var) + { + _emitInstruction(INST_SAHF, &var); + } +#endif // ASMJIT_X86 + + //! @brief Integer subtraction with borrow. + inline void sbb(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_SBB, &dst, &src); + } + //! @brief Integer subtraction with borrow. + inline void sbb(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_SBB, &dst, &src); + } + //! @brief Integer subtraction with borrow. + inline void sbb(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_SBB, &dst, &src); + } + //! @brief Integer subtraction with borrow. + inline void sbb(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_SBB, &dst, &src); + } + //! @brief Integer subtraction with borrow. + inline void sbb(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_SBB, &dst, &src); + } + + //! @brief Shift Bits Left. + //! @note @a src register can be only @c cl. + inline void sal(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_SAL, &dst, &src); + } + //! @brief Shift Bits Left. + inline void sal(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_SAL, &dst, &src); + } + //! @brief Shift Bits Left. + //! @note @a src register can be only @c cl. + inline void sal(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_SAL, &dst, &src); + } + //! @brief Shift Bits Left. + inline void sal(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_SAL, &dst, &src); + } + + //! @brief Shift Bits Right. + //! @note @a src register can be only @c cl. + inline void sar(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_SAR, &dst, &src); + } + //! @brief Shift Bits Right. + inline void sar(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_SAR, &dst, &src); + } + //! @brief Shift Bits Right. + //! @note @a src register can be only @c cl. + inline void sar(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_SAR, &dst, &src); + } + //! @brief Shift Bits Right. + inline void sar(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_SAR, &dst, &src); + } + + //! @brief Set Byte on Condition. + inline void set(CONDITION cc, const GPVar& dst) + { + ASMJIT_ASSERT(dst.getSize() == 1); + _emitInstruction(ConditionToInstruction::toSetCC(cc), &dst); + } + + //! @brief Set Byte on Condition. + inline void set(CONDITION cc, const Mem& dst) + { + ASMJIT_ASSERT(dst.getSize() <= 1); + _emitInstruction(ConditionToInstruction::toSetCC(cc), &dst); + } + + //! @brief Set Byte on Condition. + inline void seta (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETA , &dst); } + //! @brief Set Byte on Condition. + inline void seta (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETA , &dst); } + //! @brief Set Byte on Condition. + inline void setae (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETAE , &dst); } + //! @brief Set Byte on Condition. + inline void setae (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETAE , &dst); } + //! @brief Set Byte on Condition. + inline void setb (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETB , &dst); } + //! @brief Set Byte on Condition. + inline void setb (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETB , &dst); } + //! @brief Set Byte on Condition. + inline void setbe (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETBE , &dst); } + //! @brief Set Byte on Condition. + inline void setbe (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETBE , &dst); } + //! @brief Set Byte on Condition. + inline void setc (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETC , &dst); } + //! @brief Set Byte on Condition. + inline void setc (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETC , &dst); } + //! @brief Set Byte on Condition. + inline void sete (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETE , &dst); } + //! @brief Set Byte on Condition. + inline void sete (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETE , &dst); } + //! @brief Set Byte on Condition. + inline void setg (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETG , &dst); } + //! @brief Set Byte on Condition. + inline void setg (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETG , &dst); } + //! @brief Set Byte on Condition. + inline void setge (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETGE , &dst); } + //! @brief Set Byte on Condition. + inline void setge (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETGE , &dst); } + //! @brief Set Byte on Condition. + inline void setl (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETL , &dst); } + //! @brief Set Byte on Condition. + inline void setl (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETL , &dst); } + //! @brief Set Byte on Condition. + inline void setle (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETLE , &dst); } + //! @brief Set Byte on Condition. + inline void setle (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETLE , &dst); } + //! @brief Set Byte on Condition. + inline void setna (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNA , &dst); } + //! @brief Set Byte on Condition. + inline void setna (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNA , &dst); } + //! @brief Set Byte on Condition. + inline void setnae(const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNAE, &dst); } + //! @brief Set Byte on Condition. + inline void setnae(const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNAE, &dst); } + //! @brief Set Byte on Condition. + inline void setnb (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNB , &dst); } + //! @brief Set Byte on Condition. + inline void setnb (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNB , &dst); } + //! @brief Set Byte on Condition. + inline void setnbe(const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNBE, &dst); } + //! @brief Set Byte on Condition. + inline void setnbe(const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNBE, &dst); } + //! @brief Set Byte on Condition. + inline void setnc (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNC , &dst); } + //! @brief Set Byte on Condition. + inline void setnc (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNC , &dst); } + //! @brief Set Byte on Condition. + inline void setne (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNE , &dst); } + //! @brief Set Byte on Condition. + inline void setne (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNE , &dst); } + //! @brief Set Byte on Condition. + inline void setng (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNG , &dst); } + //! @brief Set Byte on Condition. + inline void setng (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNG , &dst); } + //! @brief Set Byte on Condition. + inline void setnge(const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNGE, &dst); } + //! @brief Set Byte on Condition. + inline void setnge(const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNGE, &dst); } + //! @brief Set Byte on Condition. + inline void setnl (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNL , &dst); } + //! @brief Set Byte on Condition. + inline void setnl (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNL , &dst); } + //! @brief Set Byte on Condition. + inline void setnle(const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNLE, &dst); } + //! @brief Set Byte on Condition. + inline void setnle(const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNLE, &dst); } + //! @brief Set Byte on Condition. + inline void setno (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNO , &dst); } + //! @brief Set Byte on Condition. + inline void setno (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNO , &dst); } + //! @brief Set Byte on Condition. + inline void setnp (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNP , &dst); } + //! @brief Set Byte on Condition. + inline void setnp (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNP , &dst); } + //! @brief Set Byte on Condition. + inline void setns (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNS , &dst); } + //! @brief Set Byte on Condition. + inline void setns (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNS , &dst); } + //! @brief Set Byte on Condition. + inline void setnz (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETNZ , &dst); } + //! @brief Set Byte on Condition. + inline void setnz (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETNZ , &dst); } + //! @brief Set Byte on Condition. + inline void seto (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETO , &dst); } + //! @brief Set Byte on Condition. + inline void seto (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETO , &dst); } + //! @brief Set Byte on Condition. + inline void setp (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETP , &dst); } + //! @brief Set Byte on Condition. + inline void setp (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETP , &dst); } + //! @brief Set Byte on Condition. + inline void setpe (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETPE , &dst); } + //! @brief Set Byte on Condition. + inline void setpe (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETPE , &dst); } + //! @brief Set Byte on Condition. + inline void setpo (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETPO , &dst); } + //! @brief Set Byte on Condition. + inline void setpo (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETPO , &dst); } + //! @brief Set Byte on Condition. + inline void sets (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETS , &dst); } + //! @brief Set Byte on Condition. + inline void sets (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETS , &dst); } + //! @brief Set Byte on Condition. + inline void setz (const GPVar& dst) { ASMJIT_ASSERT(dst.getSize() == 1); _emitInstruction(INST_SETZ , &dst); } + //! @brief Set Byte on Condition. + inline void setz (const Mem& dst) { ASMJIT_ASSERT(dst.getSize() <= 1); _emitInstruction(INST_SETZ , &dst); } + + //! @brief Shift Bits Left. + //! @note @a src register can be only @c cl. + inline void shl(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_SHL, &dst, &src); + } + //! @brief Shift Bits Left. + inline void shl(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_SHL, &dst, &src); + } + //! @brief Shift Bits Left. + //! @note @a src register can be only @c cl. + inline void shl(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_SHL, &dst, &src); + } + //! @brief Shift Bits Left. + inline void shl(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_SHL, &dst, &src); + } + + //! @brief Shift Bits Right. + //! @note @a src register can be only @c cl. + inline void shr(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_SHR, &dst, &src); + } + //! @brief Shift Bits Right. + inline void shr(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_SHR, &dst, &src); + } + //! @brief Shift Bits Right. + //! @note @a src register can be only @c cl. + inline void shr(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_SHR, &dst, &src); + } + //! @brief Shift Bits Right. + inline void shr(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_SHR, &dst, &src); + } + + //! @brief Double Precision Shift Left. + //! @note src2 register can be only @c cl register. + inline void shld(const GPVar& dst, const GPVar& src1, const GPVar& src2) + { + _emitInstruction(INST_SHLD, &dst, &src1, &src2); + } + //! @brief Double Precision Shift Left. + inline void shld(const GPVar& dst, const GPVar& src1, const Imm& src2) + { + _emitInstruction(INST_SHLD, &dst, &src1, &src2); + } + //! @brief Double Precision Shift Left. + //! @note src2 register can be only @c cl register. + inline void shld(const Mem& dst, const GPVar& src1, const GPVar& src2) + { + _emitInstruction(INST_SHLD, &dst, &src1, &src2); + } + //! @brief Double Precision Shift Left. + inline void shld(const Mem& dst, const GPVar& src1, const Imm& src2) + { + _emitInstruction(INST_SHLD, &dst, &src1, &src2); + } + + //! @brief Double Precision Shift Right. + //! @note src2 register can be only @c cl register. + inline void shrd(const GPVar& dst, const GPVar& src1, const GPVar& src2) + { + _emitInstruction(INST_SHRD, &dst, &src1, &src2); + } + //! @brief Double Precision Shift Right. + inline void shrd(const GPVar& dst, const GPVar& src1, const Imm& src2) + { + _emitInstruction(INST_SHRD, &dst, &src1, &src2); + } + //! @brief Double Precision Shift Right. + //! @note src2 register can be only @c cl register. + inline void shrd(const Mem& dst, const GPVar& src1, const GPVar& src2) + { + _emitInstruction(INST_SHRD, &dst, &src1, &src2); + } + //! @brief Double Precision Shift Right. + inline void shrd(const Mem& dst, const GPVar& src1, const Imm& src2) + { + _emitInstruction(INST_SHRD, &dst, &src1, &src2); + } + + //! @brief Set Carry Flag to 1. + inline void stc() + { + _emitInstruction(INST_STC); + } + + //! @brief Set Direction Flag to 1. + inline void std() + { + _emitInstruction(INST_STD); + } + + //! @brief Subtract. + inline void sub(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_SUB, &dst, &src); + } + //! @brief Subtract. + inline void sub(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_SUB, &dst, &src); + } + //! @brief Subtract. + inline void sub(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_SUB, &dst, &src); + } + //! @brief Subtract. + inline void sub(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_SUB, &dst, &src); + } + //! @brief Subtract. + inline void sub(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_SUB, &dst, &src); + } + + //! @brief Logical Compare. + inline void test(const GPVar& op1, const GPVar& op2) + { + _emitInstruction(INST_TEST, &op1, &op2); + } + //! @brief Logical Compare. + inline void test(const GPVar& op1, const Imm& op2) + { + _emitInstruction(INST_TEST, &op1, &op2); + } + //! @brief Logical Compare. + inline void test(const Mem& op1, const GPVar& op2) + { + _emitInstruction(INST_TEST, &op1, &op2); + } + //! @brief Logical Compare. + inline void test(const Mem& op1, const Imm& op2) + { + _emitInstruction(INST_TEST, &op1, &op2); + } + + //! @brief Undefined instruction - Raise invalid opcode exception. + inline void ud2() + { + _emitInstruction(INST_UD2); + } + + //! @brief Exchange and Add. + inline void xadd(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_XADD, &dst, &src); + } + //! @brief Exchange and Add. + inline void xadd(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_XADD, &dst, &src); + } + + //! @brief Exchange Register/Memory with Register. + inline void xchg(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_XCHG, &dst, &src); + } + //! @brief Exchange Register/Memory with Register. + inline void xchg(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_XCHG, &dst, &src); + } + //! @brief Exchange Register/Memory with Register. + inline void xchg(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_XCHG, &src, &dst); + } + + //! @brief Exchange Register/Memory with Register. + inline void xor_(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_XOR, &dst, &src); + } + //! @brief Exchange Register/Memory with Register. + inline void xor_(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_XOR, &dst, &src); + } + //! @brief Exchange Register/Memory with Register. + inline void xor_(const GPVar& dst, const Imm& src) + { + _emitInstruction(INST_XOR, &dst, &src); + } + //! @brief Exchange Register/Memory with Register. + inline void xor_(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_XOR, &dst, &src); + } + //! @brief Exchange Register/Memory with Register. + inline void xor_(const Mem& dst, const Imm& src) + { + _emitInstruction(INST_XOR, &dst, &src); + } + + // -------------------------------------------------------------------------- + // [MMX] + // -------------------------------------------------------------------------- + + //! @brief Empty MMX state. + inline void emms() + { + _emitInstruction(INST_EMMS); + } + + //! @brief Move DWord (MMX). + inline void movd(const Mem& dst, const MMVar& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + //! @brief Move DWord (MMX). + inline void movd(const GPVar& dst, const MMVar& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + //! @brief Move DWord (MMX). + inline void movd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + //! @brief Move DWord (MMX). + inline void movd(const MMVar& dst, const GPVar& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + + //! @brief Move QWord (MMX). + inline void movq(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } + //! @brief Move QWord (MMX). + inline void movq(const Mem& dst, const MMVar& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#if defined(ASMJIT_X64) + //! @brief Move QWord (MMX). + inline void movq(const GPVar& dst, const MMVar& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#endif + //! @brief Move QWord (MMX). + inline void movq(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#if defined(ASMJIT_X64) + //! @brief Move QWord (MMX). + inline void movq(const MMVar& dst, const GPVar& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#endif + + //! @brief Pack with Unsigned Saturation (MMX). + inline void packuswb(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PACKUSWB, &dst, &src); + } + //! @brief Pack with Unsigned Saturation (MMX). + inline void packuswb(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PACKUSWB, &dst, &src); + } + + //! @brief Packed BYTE Add (MMX). + inline void paddb(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PADDB, &dst, &src); + } + //! @brief Packed BYTE Add (MMX). + inline void paddb(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDB, &dst, &src); + } + + //! @brief Packed WORD Add (MMX). + inline void paddw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PADDW, &dst, &src); + } + //! @brief Packed WORD Add (MMX). + inline void paddw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDW, &dst, &src); + } + + //! @brief Packed DWORD Add (MMX). + inline void paddd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PADDD, &dst, &src); + } + //! @brief Packed DWORD Add (MMX). + inline void paddd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDD, &dst, &src); + } + + //! @brief Packed Add with Saturation (MMX). + inline void paddsb(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PADDSB, &dst, &src); + } + //! @brief Packed Add with Saturation (MMX). + inline void paddsb(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDSB, &dst, &src); + } + + //! @brief Packed Add with Saturation (MMX). + inline void paddsw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PADDSW, &dst, &src); + } + //! @brief Packed Add with Saturation (MMX). + inline void paddsw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDSW, &dst, &src); + } + + //! @brief Packed Add Unsigned with Saturation (MMX). + inline void paddusb(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PADDUSB, &dst, &src); + } + //! @brief Packed Add Unsigned with Saturation (MMX). + inline void paddusb(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDUSB, &dst, &src); + } + + //! @brief Packed Add Unsigned with Saturation (MMX). + inline void paddusw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PADDUSW, &dst, &src); + } + //! @brief Packed Add Unsigned with Saturation (MMX). + inline void paddusw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDUSW, &dst, &src); + } + + //! @brief Logical AND (MMX). + inline void pand(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PAND, &dst, &src); + } + //! @brief Logical AND (MMX). + inline void pand(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PAND, &dst, &src); + } + + //! @brief Logical AND Not (MMX). + inline void pandn(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PANDN, &dst, &src); + } + //! @brief Logical AND Not (MMX). + inline void pandn(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PANDN, &dst, &src); + } + + //! @brief Packed Compare for Equal (BYTES) (MMX). + inline void pcmpeqb(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PCMPEQB, &dst, &src); + } + //! @brief Packed Compare for Equal (BYTES) (MMX). + inline void pcmpeqb(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQB, &dst, &src); + } + + //! @brief Packed Compare for Equal (WORDS) (MMX). + inline void pcmpeqw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PCMPEQW, &dst, &src); + } + //! @brief Packed Compare for Equal (WORDS) (MMX). + inline void pcmpeqw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQW, &dst, &src); + } + + //! @brief Packed Compare for Equal (DWORDS) (MMX). + inline void pcmpeqd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PCMPEQD, &dst, &src); + } + //! @brief Packed Compare for Equal (DWORDS) (MMX). + inline void pcmpeqd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQD, &dst, &src); + } + + //! @brief Packed Compare for Greater Than (BYTES) (MMX). + inline void pcmpgtb(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PCMPGTB, &dst, &src); + } + //! @brief Packed Compare for Greater Than (BYTES) (MMX). + inline void pcmpgtb(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTB, &dst, &src); + } + + //! @brief Packed Compare for Greater Than (WORDS) (MMX). + inline void pcmpgtw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PCMPGTW, &dst, &src); + } + //! @brief Packed Compare for Greater Than (WORDS) (MMX). + inline void pcmpgtw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTW, &dst, &src); + } + + //! @brief Packed Compare for Greater Than (DWORDS) (MMX). + inline void pcmpgtd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PCMPGTD, &dst, &src); + } + //! @brief Packed Compare for Greater Than (DWORDS) (MMX). + inline void pcmpgtd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTD, &dst, &src); + } + + //! @brief Packed Multiply High (MMX). + inline void pmulhw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PMULHW, &dst, &src); + } + //! @brief Packed Multiply High (MMX). + inline void pmulhw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMULHW, &dst, &src); + } + + //! @brief Packed Multiply Low (MMX). + inline void pmullw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PMULLW, &dst, &src); + } + //! @brief Packed Multiply Low (MMX). + inline void pmullw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMULLW, &dst, &src); + } + + //! @brief Bitwise Logical OR (MMX). + inline void por(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_POR, &dst, &src); + } + //! @brief Bitwise Logical OR (MMX). + inline void por(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_POR, &dst, &src); + } + + //! @brief Packed Multiply and Add (MMX). + inline void pmaddwd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PMADDWD, &dst, &src); + } + //! @brief Packed Multiply and Add (MMX). + inline void pmaddwd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMADDWD, &dst, &src); + } + + //! @brief Packed Shift Left Logical (MMX). + inline void pslld(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSLLD, &dst, &src); + } + //! @brief Packed Shift Left Logical (MMX). + inline void pslld(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSLLD, &dst, &src); + } + //! @brief Packed Shift Left Logical (MMX). + inline void pslld(const MMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSLLD, &dst, &src); + } + + //! @brief Packed Shift Left Logical (MMX). + inline void psllq(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSLLQ, &dst, &src); + } + //! @brief Packed Shift Left Logical (MMX). + inline void psllq(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSLLQ, &dst, &src); + } + //! @brief Packed Shift Left Logical (MMX). + inline void psllq(const MMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSLLQ, &dst, &src); + } + + //! @brief Packed Shift Left Logical (MMX). + inline void psllw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSLLW, &dst, &src); + } + //! @brief Packed Shift Left Logical (MMX). + inline void psllw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSLLW, &dst, &src); + } + //! @brief Packed Shift Left Logical (MMX). + inline void psllw(const MMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSLLW, &dst, &src); + } + + //! @brief Packed Shift Right Arithmetic (MMX). + inline void psrad(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSRAD, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (MMX). + inline void psrad(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSRAD, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (MMX). + inline void psrad(const MMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSRAD, &dst, &src); + } + + //! @brief Packed Shift Right Arithmetic (MMX). + inline void psraw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSRAW, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (MMX). + inline void psraw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSRAW, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (MMX). + inline void psraw(const MMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSRAW, &dst, &src); + } + + //! @brief Packed Shift Right Logical (MMX). + inline void psrld(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSRLD, &dst, &src); + } + //! @brief Packed Shift Right Logical (MMX). + inline void psrld(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSRLD, &dst, &src); + } + //! @brief Packed Shift Right Logical (MMX). + inline void psrld(const MMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSRLD, &dst, &src); + } + + //! @brief Packed Shift Right Logical (MMX). + inline void psrlq(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSRLQ, &dst, &src); + } + //! @brief Packed Shift Right Logical (MMX). + inline void psrlq(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSRLQ, &dst, &src); + } + //! @brief Packed Shift Right Logical (MMX). + inline void psrlq(const MMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSRLQ, &dst, &src); + } + + //! @brief Packed Shift Right Logical (MMX). + inline void psrlw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSRLW, &dst, &src); + } + //! @brief Packed Shift Right Logical (MMX). + inline void psrlw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSRLW, &dst, &src); + } + //! @brief Packed Shift Right Logical (MMX). + inline void psrlw(const MMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSRLW, &dst, &src); + } + + //! @brief Packed Subtract (MMX). + inline void psubb(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSUBB, &dst, &src); + } + //! @brief Packed Subtract (MMX). + inline void psubb(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBB, &dst, &src); + } + + //! @brief Packed Subtract (MMX). + inline void psubw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSUBW, &dst, &src); + } + //! @brief Packed Subtract (MMX). + inline void psubw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBW, &dst, &src); + } + + //! @brief Packed Subtract (MMX). + inline void psubd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSUBD, &dst, &src); + } + //! @brief Packed Subtract (MMX). + inline void psubd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBD, &dst, &src); + } + + //! @brief Packed Subtract with Saturation (MMX). + inline void psubsb(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSUBSB, &dst, &src); + } + //! @brief Packed Subtract with Saturation (MMX). + inline void psubsb(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBSB, &dst, &src); + } + + //! @brief Packed Subtract with Saturation (MMX). + inline void psubsw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSUBSW, &dst, &src); + } + //! @brief Packed Subtract with Saturation (MMX). + inline void psubsw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBSW, &dst, &src); + } + + //! @brief Packed Subtract with Unsigned Saturation (MMX). + inline void psubusb(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSUBUSB, &dst, &src); + } + //! @brief Packed Subtract with Unsigned Saturation (MMX). + inline void psubusb(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBUSB, &dst, &src); + } + + //! @brief Packed Subtract with Unsigned Saturation (MMX). + inline void psubusw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSUBUSW, &dst, &src); + } + //! @brief Packed Subtract with Unsigned Saturation (MMX). + inline void psubusw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBUSW, &dst, &src); + } + + //! @brief Unpack High Packed Data (MMX). + inline void punpckhbw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PUNPCKHBW, &dst, &src); + } + //! @brief Unpack High Packed Data (MMX). + inline void punpckhbw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHBW, &dst, &src); + } + + //! @brief Unpack High Packed Data (MMX). + inline void punpckhwd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PUNPCKHWD, &dst, &src); + } + //! @brief Unpack High Packed Data (MMX). + inline void punpckhwd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHWD, &dst, &src); + } + + //! @brief Unpack High Packed Data (MMX). + inline void punpckhdq(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PUNPCKHDQ, &dst, &src); + } + //! @brief Unpack High Packed Data (MMX). + inline void punpckhdq(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHDQ, &dst, &src); + } + + //! @brief Unpack High Packed Data (MMX). + inline void punpcklbw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PUNPCKLBW, &dst, &src); + } + //! @brief Unpack High Packed Data (MMX). + inline void punpcklbw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLBW, &dst, &src); + } + + //! @brief Unpack High Packed Data (MMX). + inline void punpcklwd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PUNPCKLWD, &dst, &src); + } + //! @brief Unpack High Packed Data (MMX). + inline void punpcklwd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLWD, &dst, &src); + } + + //! @brief Unpack High Packed Data (MMX). + inline void punpckldq(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PUNPCKLDQ, &dst, &src); + } + //! @brief Unpack High Packed Data (MMX). + inline void punpckldq(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLDQ, &dst, &src); + } + + //! @brief Bitwise Exclusive OR (MMX). + inline void pxor(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PXOR, &dst, &src); + } + //! @brief Bitwise Exclusive OR (MMX). + inline void pxor(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PXOR, &dst, &src); + } + + // -------------------------------------------------------------------------- + // [3dNow] + // -------------------------------------------------------------------------- + + //! @brief Faster EMMS (3dNow!). + //! + //! @note Use only for early AMD processors where is only 3dNow! or SSE. If + //! CPU contains SSE2, it's better to use @c emms() ( @c femms() is mapped + //! to @c emms() ). + inline void femms() + { + _emitInstruction(INST_FEMMS); + } + + //! @brief Packed SP-FP to Integer Convert (3dNow!). + inline void pf2id(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PF2ID, &dst, &src); + } + //! @brief Packed SP-FP to Integer Convert (3dNow!). + inline void pf2id(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PF2ID, &dst, &src); + } + + //! @brief Packed SP-FP to Integer Word Convert (3dNow!). + inline void pf2iw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PF2IW, &dst, &src); + } + //! @brief Packed SP-FP to Integer Word Convert (3dNow!). + inline void pf2iw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PF2IW, &dst, &src); + } + + //! @brief Packed SP-FP Accumulate (3dNow!). + inline void pfacc(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFACC, &dst, &src); + } + //! @brief Packed SP-FP Accumulate (3dNow!). + inline void pfacc(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFACC, &dst, &src); + } + + //! @brief Packed SP-FP Addition (3dNow!). + inline void pfadd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFADD, &dst, &src); + } + //! @brief Packed SP-FP Addition (3dNow!). + inline void pfadd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFADD, &dst, &src); + } + + //! @brief Packed SP-FP Compare - dst == src (3dNow!). + inline void pfcmpeq(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFCMPEQ, &dst, &src); + } + //! @brief Packed SP-FP Compare - dst == src (3dNow!). + inline void pfcmpeq(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFCMPEQ, &dst, &src); + } + + //! @brief Packed SP-FP Compare - dst >= src (3dNow!). + inline void pfcmpge(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFCMPGE, &dst, &src); + } + //! @brief Packed SP-FP Compare - dst >= src (3dNow!). + inline void pfcmpge(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFCMPGE, &dst, &src); + } + + //! @brief Packed SP-FP Compare - dst > src (3dNow!). + inline void pfcmpgt(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFCMPGT, &dst, &src); + } + //! @brief Packed SP-FP Compare - dst > src (3dNow!). + inline void pfcmpgt(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFCMPGT, &dst, &src); + } + + //! @brief Packed SP-FP Maximum (3dNow!). + inline void pfmax(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFMAX, &dst, &src); + } + //! @brief Packed SP-FP Maximum (3dNow!). + inline void pfmax(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFMAX, &dst, &src); + } + + //! @brief Packed SP-FP Minimum (3dNow!). + inline void pfmin(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFMIN, &dst, &src); + } + //! @brief Packed SP-FP Minimum (3dNow!). + inline void pfmin(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFMIN, &dst, &src); + } + + //! @brief Packed SP-FP Multiply (3dNow!). + inline void pfmul(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFMUL, &dst, &src); + } + //! @brief Packed SP-FP Multiply (3dNow!). + inline void pfmul(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFMUL, &dst, &src); + } + + //! @brief Packed SP-FP Negative Accumulate (3dNow!). + inline void pfnacc(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFNACC, &dst, &src); + } + //! @brief Packed SP-FP Negative Accumulate (3dNow!). + inline void pfnacc(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFNACC, &dst, &src); + } + + //! @brief Packed SP-FP Mixed Accumulate (3dNow!). + inline void pfpnaxx(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFPNACC, &dst, &src); + } + //! @brief Packed SP-FP Mixed Accumulate (3dNow!). + inline void pfpnacc(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFPNACC, &dst, &src); + } + + //! @brief Packed SP-FP Reciprocal Approximation (3dNow!). + inline void pfrcp(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFRCP, &dst, &src); + } + //! @brief Packed SP-FP Reciprocal Approximation (3dNow!). + inline void pfrcp(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFRCP, &dst, &src); + } + + //! @brief Packed SP-FP Reciprocal, First Iteration Step (3dNow!). + inline void pfrcpit1(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFRCPIT1, &dst, &src); + } + //! @brief Packed SP-FP Reciprocal, First Iteration Step (3dNow!). + inline void pfrcpit1(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFRCPIT1, &dst, &src); + } + + //! @brief Packed SP-FP Reciprocal, Second Iteration Step (3dNow!). + inline void pfrcpit2(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFRCPIT2, &dst, &src); + } + //! @brief Packed SP-FP Reciprocal, Second Iteration Step (3dNow!). + inline void pfrcpit2(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFRCPIT2, &dst, &src); + } + + //! @brief Packed SP-FP Reciprocal Square Root, First Iteration Step (3dNow!). + inline void pfrsqit1(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFRSQIT1, &dst, &src); + } + //! @brief Packed SP-FP Reciprocal Square Root, First Iteration Step (3dNow!). + inline void pfrsqit1(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFRSQIT1, &dst, &src); + } + + //! @brief Packed SP-FP Reciprocal Square Root Approximation (3dNow!). + inline void pfrsqrt(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFRSQRT, &dst, &src); + } + //! @brief Packed SP-FP Reciprocal Square Root Approximation (3dNow!). + inline void pfrsqrt(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFRSQRT, &dst, &src); + } + + //! @brief Packed SP-FP Subtract (3dNow!). + inline void pfsub(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFSUB, &dst, &src); + } + //! @brief Packed SP-FP Subtract (3dNow!). + inline void pfsub(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFSUB, &dst, &src); + } + + //! @brief Packed SP-FP Reverse Subtract (3dNow!). + inline void pfsubr(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PFSUBR, &dst, &src); + } + //! @brief Packed SP-FP Reverse Subtract (3dNow!). + inline void pfsubr(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PFSUBR, &dst, &src); + } + + //! @brief Packed DWords to SP-FP (3dNow!). + inline void pi2fd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PI2FD, &dst, &src); + } + //! @brief Packed DWords to SP-FP (3dNow!). + inline void pi2fd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PI2FD, &dst, &src); + } + + //! @brief Packed Words to SP-FP (3dNow!). + inline void pi2fw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PI2FW, &dst, &src); + } + //! @brief Packed Words to SP-FP (3dNow!). + inline void pi2fw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PI2FW, &dst, &src); + } + + //! @brief Packed swap DWord (3dNow!) + inline void pswapd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSWAPD, &dst, &src); + } + //! @brief Packed swap DWord (3dNow!) + inline void pswapd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSWAPD, &dst, &src); + } + + // -------------------------------------------------------------------------- + // [SSE] + // -------------------------------------------------------------------------- + + //! @brief Packed SP-FP Add (SSE). + inline void addps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_ADDPS, &dst, &src); + } + //! @brief Packed SP-FP Add (SSE). + inline void addps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_ADDPS, &dst, &src); + } + + //! @brief Scalar SP-FP Add (SSE). + inline void addss(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_ADDSS, &dst, &src); + } + //! @brief Scalar SP-FP Add (SSE). + inline void addss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_ADDSS, &dst, &src); + } + + //! @brief Bit-wise Logical And Not For SP-FP (SSE). + inline void andnps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_ANDNPS, &dst, &src); + } + //! @brief Bit-wise Logical And Not For SP-FP (SSE). + inline void andnps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_ANDNPS, &dst, &src); + } + + //! @brief Bit-wise Logical And For SP-FP (SSE). + inline void andps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_ANDPS, &dst, &src); + } + //! @brief Bit-wise Logical And For SP-FP (SSE). + inline void andps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_ANDPS, &dst, &src); + } + + //! @brief Packed SP-FP Compare (SSE). + inline void cmpps(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_CMPPS, &dst, &src, &imm8); + } + //! @brief Packed SP-FP Compare (SSE). + inline void cmpps(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_CMPPS, &dst, &src, &imm8); + } + + //! @brief Compare Scalar SP-FP Values (SSE). + inline void cmpss(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_CMPSS, &dst, &src, &imm8); + } + //! @brief Compare Scalar SP-FP Values (SSE). + inline void cmpss(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_CMPSS, &dst, &src, &imm8); + } + + //! @brief Scalar Ordered SP-FP Compare and Set EFLAGS (SSE). + inline void comiss(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_COMISS, &dst, &src); + } + //! @brief Scalar Ordered SP-FP Compare and Set EFLAGS (SSE). + inline void comiss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_COMISS, &dst, &src); + } + + //! @brief Packed Signed INT32 to Packed SP-FP Conversion (SSE). + inline void cvtpi2ps(const XMMVar& dst, const MMVar& src) + { + _emitInstruction(INST_CVTPI2PS, &dst, &src); + } + //! @brief Packed Signed INT32 to Packed SP-FP Conversion (SSE). + inline void cvtpi2ps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTPI2PS, &dst, &src); + } + + //! @brief Packed SP-FP to Packed INT32 Conversion (SSE). + inline void cvtps2pi(const MMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTPS2PI, &dst, &src); + } + //! @brief Packed SP-FP to Packed INT32 Conversion (SSE). + inline void cvtps2pi(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTPS2PI, &dst, &src); + } + + //! @brief Scalar Signed INT32 to SP-FP Conversion (SSE). + inline void cvtsi2ss(const XMMVar& dst, const GPVar& src) + { + _emitInstruction(INST_CVTSI2SS, &dst, &src); + } + //! @brief Scalar Signed INT32 to SP-FP Conversion (SSE). + inline void cvtsi2ss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTSI2SS, &dst, &src); + } + + //! @brief Scalar SP-FP to Signed INT32 Conversion (SSE). + inline void cvtss2si(const GPVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTSS2SI, &dst, &src); + } + //! @brief Scalar SP-FP to Signed INT32 Conversion (SSE). + inline void cvtss2si(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTSS2SI, &dst, &src); + } + + //! @brief Packed SP-FP to Packed INT32 Conversion (truncate) (SSE). + inline void cvttps2pi(const MMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTTPS2PI, &dst, &src); + } + //! @brief Packed SP-FP to Packed INT32 Conversion (truncate) (SSE). + inline void cvttps2pi(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTTPS2PI, &dst, &src); + } + + //! @brief Scalar SP-FP to Signed INT32 Conversion (truncate) (SSE). + inline void cvttss2si(const GPVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTTSS2SI, &dst, &src); + } + //! @brief Scalar SP-FP to Signed INT32 Conversion (truncate) (SSE). + inline void cvttss2si(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTTSS2SI, &dst, &src); + } + + //! @brief Packed SP-FP Divide (SSE). + inline void divps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_DIVPS, &dst, &src); + } + //! @brief Packed SP-FP Divide (SSE). + inline void divps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_DIVPS, &dst, &src); + } + + //! @brief Scalar SP-FP Divide (SSE). + inline void divss(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_DIVSS, &dst, &src); + } + //! @brief Scalar SP-FP Divide (SSE). + inline void divss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_DIVSS, &dst, &src); + } + + //! @brief Load Streaming SIMD Extension Control/Status (SSE). + inline void ldmxcsr(const Mem& src) + { + _emitInstruction(INST_LDMXCSR, &src); + } + + //! @brief Byte Mask Write (SSE). + //! + //! @note The default memory location is specified by DS:EDI. + inline void maskmovq(const GPVar& dst_ptr, const MMVar& data, const MMVar& mask) + { + _emitInstruction(INST_MASKMOVQ, &dst_ptr, &data, &mask); + } + + //! @brief Packed SP-FP Maximum (SSE). + inline void maxps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MAXPS, &dst, &src); + } + //! @brief Packed SP-FP Maximum (SSE). + inline void maxps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MAXPS, &dst, &src); + } + + //! @brief Scalar SP-FP Maximum (SSE). + inline void maxss(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MAXSS, &dst, &src); + } + //! @brief Scalar SP-FP Maximum (SSE). + inline void maxss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MAXSS, &dst, &src); + } + + //! @brief Packed SP-FP Minimum (SSE). + inline void minps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MINPS, &dst, &src); + } + //! @brief Packed SP-FP Minimum (SSE). + inline void minps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MINPS, &dst, &src); + } + + //! @brief Scalar SP-FP Minimum (SSE). + inline void minss(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MINSS, &dst, &src); + } + //! @brief Scalar SP-FP Minimum (SSE). + inline void minss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MINSS, &dst, &src); + } + + //! @brief Move Aligned Packed SP-FP Values (SSE). + inline void movaps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVAPS, &dst, &src); + } + //! @brief Move Aligned Packed SP-FP Values (SSE). + inline void movaps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVAPS, &dst, &src); + } + + //! @brief Move Aligned Packed SP-FP Values (SSE). + inline void movaps(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVAPS, &dst, &src); + } + + //! @brief Move DWord. + inline void movd(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + //! @brief Move DWord. + inline void movd(const GPVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + //! @brief Move DWord. + inline void movd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + //! @brief Move DWord. + inline void movd(const XMMVar& dst, const GPVar& src) + { + _emitInstruction(INST_MOVD, &dst, &src); + } + + //! @brief Move QWord (SSE). + inline void movq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } + //! @brief Move QWord (SSE). + inline void movq(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#if defined(ASMJIT_X64) + //! @brief Move QWord (SSE). + inline void movq(const GPVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#endif // ASMJIT_X64 + //! @brief Move QWord (SSE). + inline void movq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#if defined(ASMJIT_X64) + //! @brief Move QWord (SSE). + inline void movq(const XMMVar& dst, const GPVar& src) + { + _emitInstruction(INST_MOVQ, &dst, &src); + } +#endif // ASMJIT_X64 + + //! @brief Move 64 Bits Non Temporal (SSE). + inline void movntq(const Mem& dst, const MMVar& src) + { + _emitInstruction(INST_MOVNTQ, &dst, &src); + } + + //! @brief High to Low Packed SP-FP (SSE). + inline void movhlps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVHLPS, &dst, &src); + } + + //! @brief Move High Packed SP-FP (SSE). + inline void movhps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVHPS, &dst, &src); + } + + //! @brief Move High Packed SP-FP (SSE). + inline void movhps(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVHPS, &dst, &src); + } + + //! @brief Move Low to High Packed SP-FP (SSE). + inline void movlhps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVLHPS, &dst, &src); + } + + //! @brief Move Low Packed SP-FP (SSE). + inline void movlps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVLPS, &dst, &src); + } + + //! @brief Move Low Packed SP-FP (SSE). + inline void movlps(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVLPS, &dst, &src); + } + + //! @brief Move Aligned Four Packed SP-FP Non Temporal (SSE). + inline void movntps(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVNTPS, &dst, &src); + } + + //! @brief Move Scalar SP-FP (SSE). + inline void movss(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVSS, &dst, &src); + } + + //! @brief Move Scalar SP-FP (SSE). + inline void movss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVSS, &dst, &src); + } + + //! @brief Move Scalar SP-FP (SSE). + inline void movss(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVSS, &dst, &src); + } + + //! @brief Move Unaligned Packed SP-FP Values (SSE). + inline void movups(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVUPS, &dst, &src); + } + //! @brief Move Unaligned Packed SP-FP Values (SSE). + inline void movups(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVUPS, &dst, &src); + } + + //! @brief Move Unaligned Packed SP-FP Values (SSE). + inline void movups(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVUPS, &dst, &src); + } + + //! @brief Packed SP-FP Multiply (SSE). + inline void mulps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MULPS, &dst, &src); + } + //! @brief Packed SP-FP Multiply (SSE). + inline void mulps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MULPS, &dst, &src); + } + + //! @brief Scalar SP-FP Multiply (SSE). + inline void mulss(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MULSS, &dst, &src); + } + //! @brief Scalar SP-FP Multiply (SSE). + inline void mulss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MULSS, &dst, &src); + } + + //! @brief Bit-wise Logical OR for SP-FP Data (SSE). + inline void orps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_ORPS, &dst, &src); + } + //! @brief Bit-wise Logical OR for SP-FP Data (SSE). + inline void orps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_ORPS, &dst, &src); + } + + //! @brief Packed Average (SSE). + inline void pavgb(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PAVGB, &dst, &src); + } + //! @brief Packed Average (SSE). + inline void pavgb(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PAVGB, &dst, &src); + } + + //! @brief Packed Average (SSE). + inline void pavgw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PAVGW, &dst, &src); + } + //! @brief Packed Average (SSE). + inline void pavgw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PAVGW, &dst, &src); + } + + //! @brief Extract Word (SSE). + inline void pextrw(const GPVar& dst, const MMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRW, &dst, &src, &imm8); + } + + //! @brief Insert Word (SSE). + inline void pinsrw(const MMVar& dst, const GPVar& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRW, &dst, &src, &imm8); + } + //! @brief Insert Word (SSE). + inline void pinsrw(const MMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRW, &dst, &src, &imm8); + } + + //! @brief Packed Signed Integer Word Maximum (SSE). + inline void pmaxsw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PMAXSW, &dst, &src); + } + //! @brief Packed Signed Integer Word Maximum (SSE). + inline void pmaxsw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMAXSW, &dst, &src); + } + + //! @brief Packed Unsigned Integer Byte Maximum (SSE). + inline void pmaxub(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PMAXUB, &dst, &src); + } + //! @brief Packed Unsigned Integer Byte Maximum (SSE). + inline void pmaxub(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMAXUB, &dst, &src); + } + + //! @brief Packed Signed Integer Word Minimum (SSE). + inline void pminsw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PMINSW, &dst, &src); + } + //! @brief Packed Signed Integer Word Minimum (SSE). + inline void pminsw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMINSW, &dst, &src); + } + + //! @brief Packed Unsigned Integer Byte Minimum (SSE). + inline void pminub(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PMINUB, &dst, &src); + } + //! @brief Packed Unsigned Integer Byte Minimum (SSE). + inline void pminub(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMINUB, &dst, &src); + } + + //! @brief Move Byte Mask To Integer (SSE). + inline void pmovmskb(const GPVar& dst, const MMVar& src) + { + _emitInstruction(INST_PMOVMSKB, &dst, &src); + } + + //! @brief Packed Multiply High Unsigned (SSE). + inline void pmulhuw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PMULHUW, &dst, &src); + } + //! @brief Packed Multiply High Unsigned (SSE). + inline void pmulhuw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMULHUW, &dst, &src); + } + + //! @brief Packed Sum of Absolute Differences (SSE). + inline void psadbw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSADBW, &dst, &src); + } + //! @brief Packed Sum of Absolute Differences (SSE). + inline void psadbw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSADBW, &dst, &src); + } + + //! @brief Packed Shuffle word (SSE). + inline void pshufw(const MMVar& dst, const MMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFW, &dst, &src, &imm8); + } + //! @brief Packed Shuffle word (SSE). + inline void pshufw(const MMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFW, &dst, &src, &imm8); + } + + //! @brief Packed SP-FP Reciprocal (SSE). + inline void rcpps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_RCPPS, &dst, &src); + } + //! @brief Packed SP-FP Reciprocal (SSE). + inline void rcpps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_RCPPS, &dst, &src); + } + + //! @brief Scalar SP-FP Reciprocal (SSE). + inline void rcpss(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_RCPSS, &dst, &src); + } + //! @brief Scalar SP-FP Reciprocal (SSE). + inline void rcpss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_RCPSS, &dst, &src); + } + + //! @brief Prefetch (SSE). + inline void prefetch(const Mem& mem, const Imm& hint) + { + _emitInstruction(INST_PREFETCH, &mem, &hint); + } + + //! @brief Compute Sum of Absolute Differences (SSE). + inline void psadbw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSADBW, &dst, &src); + } + //! @brief Compute Sum of Absolute Differences (SSE). + inline void psadbw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSADBW, &dst, &src); + } + + //! @brief Packed SP-FP Square Root Reciprocal (SSE). + inline void rsqrtps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_RSQRTPS, &dst, &src); + } + //! @brief Packed SP-FP Square Root Reciprocal (SSE). + inline void rsqrtps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_RSQRTPS, &dst, &src); + } + + //! @brief Scalar SP-FP Square Root Reciprocal (SSE). + inline void rsqrtss(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_RSQRTSS, &dst, &src); + } + //! @brief Scalar SP-FP Square Root Reciprocal (SSE). + inline void rsqrtss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_RSQRTSS, &dst, &src); + } + + //! @brief Store fence (SSE). + inline void sfence() + { + _emitInstruction(INST_SFENCE); + } + + //! @brief Shuffle SP-FP (SSE). + inline void shufps(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_SHUFPS, &dst, &src, &imm8); + } + //! @brief Shuffle SP-FP (SSE). + inline void shufps(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_SHUFPS, &dst, &src, &imm8); + } + + //! @brief Packed SP-FP Square Root (SSE). + inline void sqrtps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_SQRTPS, &dst, &src); + } + //! @brief Packed SP-FP Square Root (SSE). + inline void sqrtps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_SQRTPS, &dst, &src); + } + + //! @brief Scalar SP-FP Square Root (SSE). + inline void sqrtss(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_SQRTSS, &dst, &src); + } + //! @brief Scalar SP-FP Square Root (SSE). + inline void sqrtss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_SQRTSS, &dst, &src); + } + + //! @brief Store Streaming SIMD Extension Control/Status (SSE). + inline void stmxcsr(const Mem& dst) + { + _emitInstruction(INST_STMXCSR, &dst); + } + + //! @brief Packed SP-FP Subtract (SSE). + inline void subps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_SUBPS, &dst, &src); + } + //! @brief Packed SP-FP Subtract (SSE). + inline void subps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_SUBPS, &dst, &src); + } + + //! @brief Scalar SP-FP Subtract (SSE). + inline void subss(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_SUBSS, &dst, &src); + } + //! @brief Scalar SP-FP Subtract (SSE). + inline void subss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_SUBSS, &dst, &src); + } + + //! @brief Unordered Scalar SP-FP compare and set EFLAGS (SSE). + inline void ucomiss(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_UCOMISS, &dst, &src); + } + //! @brief Unordered Scalar SP-FP compare and set EFLAGS (SSE). + inline void ucomiss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_UCOMISS, &dst, &src); + } + + //! @brief Unpack High Packed SP-FP Data (SSE). + inline void unpckhps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_UNPCKHPS, &dst, &src); + } + //! @brief Unpack High Packed SP-FP Data (SSE). + inline void unpckhps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_UNPCKHPS, &dst, &src); + } + + //! @brief Unpack Low Packed SP-FP Data (SSE). + inline void unpcklps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_UNPCKLPS, &dst, &src); + } + //! @brief Unpack Low Packed SP-FP Data (SSE). + inline void unpcklps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_UNPCKLPS, &dst, &src); + } + + //! @brief Bit-wise Logical Xor for SP-FP Data (SSE). + inline void xorps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_XORPS, &dst, &src); + } + //! @brief Bit-wise Logical Xor for SP-FP Data (SSE). + inline void xorps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_XORPS, &dst, &src); + } + + // -------------------------------------------------------------------------- + // [SSE2] + // -------------------------------------------------------------------------- + + //! @brief Packed DP-FP Add (SSE2). + inline void addpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_ADDPD, &dst, &src); + } + //! @brief Packed DP-FP Add (SSE2). + inline void addpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_ADDPD, &dst, &src); + } + + //! @brief Scalar DP-FP Add (SSE2). + inline void addsd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_ADDSD, &dst, &src); + } + //! @brief Scalar DP-FP Add (SSE2). + inline void addsd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_ADDSD, &dst, &src); + } + + //! @brief Bit-wise Logical And Not For DP-FP (SSE2). + inline void andnpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_ANDNPD, &dst, &src); + } + //! @brief Bit-wise Logical And Not For DP-FP (SSE2). + inline void andnpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_ANDNPD, &dst, &src); + } + + //! @brief Bit-wise Logical And For DP-FP (SSE2). + inline void andpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_ANDPD, &dst, &src); + } + //! @brief Bit-wise Logical And For DP-FP (SSE2). + inline void andpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_ANDPD, &dst, &src); + } + + //! @brief Flush Cache Line (SSE2). + inline void clflush(const Mem& mem) + { + _emitInstruction(INST_CLFLUSH, &mem); + } + + //! @brief Packed DP-FP Compare (SSE2). + inline void cmppd(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_CMPPD, &dst, &src, &imm8); + } + //! @brief Packed DP-FP Compare (SSE2). + inline void cmppd(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_CMPPD, &dst, &src, &imm8); + } + + //! @brief Compare Scalar SP-FP Values (SSE2). + inline void cmpsd(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_CMPSD, &dst, &src, &imm8); + } + //! @brief Compare Scalar SP-FP Values (SSE2). + inline void cmpsd(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_CMPSD, &dst, &src, &imm8); + } + + //! @brief Scalar Ordered DP-FP Compare and Set EFLAGS (SSE2). + inline void comisd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_COMISD, &dst, &src); + } + //! @brief Scalar Ordered DP-FP Compare and Set EFLAGS (SSE2). + inline void comisd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_COMISD, &dst, &src); + } + + //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2). + inline void cvtdq2pd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTDQ2PD, &dst, &src); + } + //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2). + inline void cvtdq2pd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTDQ2PD, &dst, &src); + } + + //! @brief Convert Packed Dword Integers to Packed SP-FP Values (SSE2). + inline void cvtdq2ps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTDQ2PS, &dst, &src); + } + //! @brief Convert Packed Dword Integers to Packed SP-FP Values (SSE2). + inline void cvtdq2ps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTDQ2PS, &dst, &src); + } + + //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvtpd2dq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTPD2DQ, &dst, &src); + } + //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvtpd2dq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTPD2DQ, &dst, &src); + } + + //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvtpd2pi(const MMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTPD2PI, &dst, &src); + } + //! @brief Convert Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvtpd2pi(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTPD2PI, &dst, &src); + } + + //! @brief Convert Packed DP-FP Values to Packed SP-FP Values (SSE2). + inline void cvtpd2ps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTPD2PS, &dst, &src); + } + //! @brief Convert Packed DP-FP Values to Packed SP-FP Values (SSE2). + inline void cvtpd2ps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTPD2PS, &dst, &src); + } + + //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2). + inline void cvtpi2pd(const XMMVar& dst, const MMVar& src) + { + _emitInstruction(INST_CVTPI2PD, &dst, &src); + } + //! @brief Convert Packed Dword Integers to Packed DP-FP Values (SSE2). + inline void cvtpi2pd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTPI2PD, &dst, &src); + } + + //! @brief Convert Packed SP-FP Values to Packed Dword Integers (SSE2). + inline void cvtps2dq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTPS2DQ, &dst, &src); + } + //! @brief Convert Packed SP-FP Values to Packed Dword Integers (SSE2). + inline void cvtps2dq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTPS2DQ, &dst, &src); + } + + //! @brief Convert Packed SP-FP Values to Packed DP-FP Values (SSE2). + inline void cvtps2pd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTPS2PD, &dst, &src); + } + //! @brief Convert Packed SP-FP Values to Packed DP-FP Values (SSE2). + inline void cvtps2pd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTPS2PD, &dst, &src); + } + + //! @brief Convert Scalar DP-FP Value to Dword Integer (SSE2). + inline void cvtsd2si(const GPVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTSD2SI, &dst, &src); + } + //! @brief Convert Scalar DP-FP Value to Dword Integer (SSE2). + inline void cvtsd2si(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTSD2SI, &dst, &src); + } + + //! @brief Convert Scalar DP-FP Value to Scalar SP-FP Value (SSE2). + inline void cvtsd2ss(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTSD2SS, &dst, &src); + } + //! @brief Convert Scalar DP-FP Value to Scalar SP-FP Value (SSE2). + inline void cvtsd2ss(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTSD2SS, &dst, &src); + } + + //! @brief Convert Dword Integer to Scalar DP-FP Value (SSE2). + inline void cvtsi2sd(const XMMVar& dst, const GPVar& src) + { + _emitInstruction(INST_CVTSI2SD, &dst, &src); + } + //! @brief Convert Dword Integer to Scalar DP-FP Value (SSE2). + inline void cvtsi2sd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTSI2SD, &dst, &src); + } + + //! @brief Convert Scalar SP-FP Value to Scalar DP-FP Value (SSE2). + inline void cvtss2sd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTSS2SD, &dst, &src); + } + //! @brief Convert Scalar SP-FP Value to Scalar DP-FP Value (SSE2). + inline void cvtss2sd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTSS2SD, &dst, &src); + } + + //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvttpd2pi(const MMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTTPD2PI, &dst, &src); + } + //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvttpd2pi(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTTPD2PI, &dst, &src); + } + + //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvttpd2dq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTTPD2DQ, &dst, &src); + } + //! @brief Convert with Truncation Packed DP-FP Values to Packed Dword Integers (SSE2). + inline void cvttpd2dq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTTPD2DQ, &dst, &src); + } + + //! @brief Convert with Truncation Packed SP-FP Values to Packed Dword Integers (SSE2). + inline void cvttps2dq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTTPS2DQ, &dst, &src); + } + //! @brief Convert with Truncation Packed SP-FP Values to Packed Dword Integers (SSE2). + inline void cvttps2dq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTTPS2DQ, &dst, &src); + } + + //! @brief Convert with Truncation Scalar DP-FP Value to Signed Dword Integer (SSE2). + inline void cvttsd2si(const GPVar& dst, const XMMVar& src) + { + _emitInstruction(INST_CVTTSD2SI, &dst, &src); + } + //! @brief Convert with Truncation Scalar DP-FP Value to Signed Dword Integer (SSE2). + inline void cvttsd2si(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_CVTTSD2SI, &dst, &src); + } + + //! @brief Packed DP-FP Divide (SSE2). + inline void divpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_DIVPD, &dst, &src); + } + //! @brief Packed DP-FP Divide (SSE2). + inline void divpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_DIVPD, &dst, &src); + } + + //! @brief Scalar DP-FP Divide (SSE2). + inline void divsd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_DIVSD, &dst, &src); + } + //! @brief Scalar DP-FP Divide (SSE2). + inline void divsd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_DIVSD, &dst, &src); + } + + //! @brief Load Fence (SSE2). + inline void lfence() + { + _emitInstruction(INST_LFENCE); + } + + //! @brief Store Selected Bytes of Double Quadword (SSE2). + //! + //! @note Target is DS:EDI. + inline void maskmovdqu(const GPVar& dst_ptr, const XMMVar& src, const XMMVar& mask) + { + _emitInstruction(INST_MASKMOVDQU, &dst_ptr, &src, &mask); + } + + //! @brief Return Maximum Packed Double-Precision FP Values (SSE2). + inline void maxpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MAXPD, &dst, &src); + } + //! @brief Return Maximum Packed Double-Precision FP Values (SSE2). + inline void maxpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MAXPD, &dst, &src); + } + + //! @brief Return Maximum Scalar Double-Precision FP Value (SSE2). + inline void maxsd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MAXSD, &dst, &src); + } + //! @brief Return Maximum Scalar Double-Precision FP Value (SSE2). + inline void maxsd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MAXSD, &dst, &src); + } + + //! @brief Memory Fence (SSE2). + inline void mfence() + { + _emitInstruction(INST_MFENCE); + } + + //! @brief Return Minimum Packed DP-FP Values (SSE2). + inline void minpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MINPD, &dst, &src); + } + //! @brief Return Minimum Packed DP-FP Values (SSE2). + inline void minpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MINPD, &dst, &src); + } + + //! @brief Return Minimum Scalar DP-FP Value (SSE2). + inline void minsd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MINSD, &dst, &src); + } + //! @brief Return Minimum Scalar DP-FP Value (SSE2). + inline void minsd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MINSD, &dst, &src); + } + + //! @brief Move Aligned DQWord (SSE2). + inline void movdqa(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVDQA, &dst, &src); + } + //! @brief Move Aligned DQWord (SSE2). + inline void movdqa(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVDQA, &dst, &src); + } + + //! @brief Move Aligned DQWord (SSE2). + inline void movdqa(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVDQA, &dst, &src); + } + + //! @brief Move Unaligned Double Quadword (SSE2). + inline void movdqu(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVDQU, &dst, &src); + } + //! @brief Move Unaligned Double Quadword (SSE2). + inline void movdqu(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVDQU, &dst, &src); + } + + //! @brief Move Unaligned Double Quadword (SSE2). + inline void movdqu(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVDQU, &dst, &src); + } + + //! @brief Extract Packed SP-FP Sign Mask (SSE2). + inline void movmskps(const GPVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVMSKPS, &dst, &src); + } + + //! @brief Extract Packed DP-FP Sign Mask (SSE2). + inline void movmskpd(const GPVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVMSKPD, &dst, &src); + } + + //! @brief Move Scalar Double-Precision FP Value (SSE2). + inline void movsd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVSD, &dst, &src); + } + //! @brief Move Scalar Double-Precision FP Value (SSE2). + inline void movsd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVSD, &dst, &src); + } + + //! @brief Move Scalar Double-Precision FP Value (SSE2). + inline void movsd(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVSD, &dst, &src); + } + + //! @brief Move Aligned Packed Double-Precision FP Values (SSE2). + inline void movapd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVAPD, &dst, &src); + } + + //! @brief Move Aligned Packed Double-Precision FP Values (SSE2). + inline void movapd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVAPD, &dst, &src); + } + + //! @brief Move Aligned Packed Double-Precision FP Values (SSE2). + inline void movapd(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVAPD, &dst, &src); + } + + //! @brief Move Quadword from XMM to MMX Technology Register (SSE2). + inline void movdq2q(const MMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVDQ2Q, &dst, &src); + } + + //! @brief Move Quadword from MMX Technology to XMM Register (SSE2). + inline void movq2dq(const XMMVar& dst, const MMVar& src) + { + _emitInstruction(INST_MOVQ2DQ, &dst, &src); + } + + //! @brief Move High Packed Double-Precision FP Value (SSE2). + inline void movhpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVHPD, &dst, &src); + } + + //! @brief Move High Packed Double-Precision FP Value (SSE2). + inline void movhpd(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVHPD, &dst, &src); + } + + //! @brief Move Low Packed Double-Precision FP Value (SSE2). + inline void movlpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVLPD, &dst, &src); + } + + //! @brief Move Low Packed Double-Precision FP Value (SSE2). + inline void movlpd(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVLPD, &dst, &src); + } + + //! @brief Store Double Quadword Using Non-Temporal Hint (SSE2). + inline void movntdq(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVNTDQ, &dst, &src); + } + + //! @brief Store Store DWORD Using Non-Temporal Hint (SSE2). + inline void movnti(const Mem& dst, const GPVar& src) + { + _emitInstruction(INST_MOVNTI, &dst, &src); + } + + //! @brief Store Packed Double-Precision FP Values Using Non-Temporal Hint (SSE2). + inline void movntpd(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVNTPD, &dst, &src); + } + + //! @brief Move Unaligned Packed Double-Precision FP Values (SSE2). + inline void movupd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVUPD, &dst, &src); + } + + //! @brief Move Unaligned Packed Double-Precision FP Values (SSE2). + inline void movupd(const Mem& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVUPD, &dst, &src); + } + + //! @brief Packed DP-FP Multiply (SSE2). + inline void mulpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MULPD, &dst, &src); + } + //! @brief Packed DP-FP Multiply (SSE2). + inline void mulpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MULPD, &dst, &src); + } + + //! @brief Scalar DP-FP Multiply (SSE2). + inline void mulsd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MULSD, &dst, &src); + } + //! @brief Scalar DP-FP Multiply (SSE2). + inline void mulsd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MULSD, &dst, &src); + } + + //! @brief Bit-wise Logical OR for DP-FP Data (SSE2). + inline void orpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_ORPD, &dst, &src); + } + //! @brief Bit-wise Logical OR for DP-FP Data (SSE2). + inline void orpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_ORPD, &dst, &src); + } + + //! @brief Pack with Signed Saturation (SSE2). + inline void packsswb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PACKSSWB, &dst, &src); + } + //! @brief Pack with Signed Saturation (SSE2). + inline void packsswb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PACKSSWB, &dst, &src); + } + + //! @brief Pack with Signed Saturation (SSE2). + inline void packssdw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PACKSSDW, &dst, &src); + } + //! @brief Pack with Signed Saturation (SSE2). + inline void packssdw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PACKSSDW, &dst, &src); + } + + //! @brief Pack with Unsigned Saturation (SSE2). + inline void packuswb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PACKUSWB, &dst, &src); + } + //! @brief Pack with Unsigned Saturation (SSE2). + inline void packuswb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PACKUSWB, &dst, &src); + } + + //! @brief Packed BYTE Add (SSE2). + inline void paddb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PADDB, &dst, &src); + } + //! @brief Packed BYTE Add (SSE2). + inline void paddb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDB, &dst, &src); + } + + //! @brief Packed WORD Add (SSE2). + inline void paddw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PADDW, &dst, &src); + } + //! @brief Packed WORD Add (SSE2). + inline void paddw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDW, &dst, &src); + } + + //! @brief Packed DWORD Add (SSE2). + inline void paddd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PADDD, &dst, &src); + } + //! @brief Packed DWORD Add (SSE2). + inline void paddd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDD, &dst, &src); + } + + //! @brief Packed QWORD Add (SSE2). + inline void paddq(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PADDQ, &dst, &src); + } + //! @brief Packed QWORD Add (SSE2). + inline void paddq(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDQ, &dst, &src); + } + + //! @brief Packed QWORD Add (SSE2). + inline void paddq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PADDQ, &dst, &src); + } + //! @brief Packed QWORD Add (SSE2). + inline void paddq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDQ, &dst, &src); + } + + //! @brief Packed Add with Saturation (SSE2). + inline void paddsb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PADDSB, &dst, &src); + } + //! @brief Packed Add with Saturation (SSE2). + inline void paddsb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDSB, &dst, &src); + } + + //! @brief Packed Add with Saturation (SSE2). + inline void paddsw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PADDSW, &dst, &src); + } + //! @brief Packed Add with Saturation (SSE2). + inline void paddsw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDSW, &dst, &src); + } + + //! @brief Packed Add Unsigned with Saturation (SSE2). + inline void paddusb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PADDUSB, &dst, &src); + } + //! @brief Packed Add Unsigned with Saturation (SSE2). + inline void paddusb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDUSB, &dst, &src); + } + + //! @brief Packed Add Unsigned with Saturation (SSE2). + inline void paddusw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PADDUSW, &dst, &src); + } + //! @brief Packed Add Unsigned with Saturation (SSE2). + inline void paddusw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PADDUSW, &dst, &src); + } + + //! @brief Logical AND (SSE2). + inline void pand(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PAND, &dst, &src); + } + //! @brief Logical AND (SSE2). + inline void pand(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PAND, &dst, &src); + } + + //! @brief Logical AND Not (SSE2). + inline void pandn(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PANDN, &dst, &src); + } + //! @brief Logical AND Not (SSE2). + inline void pandn(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PANDN, &dst, &src); + } + + //! @brief Spin Loop Hint (SSE2). + inline void pause() + { + _emitInstruction(INST_PAUSE); + } + + //! @brief Packed Average (SSE2). + inline void pavgb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PAVGB, &dst, &src); + } + //! @brief Packed Average (SSE2). + inline void pavgb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PAVGB, &dst, &src); + } + + //! @brief Packed Average (SSE2). + inline void pavgw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PAVGW, &dst, &src); + } + //! @brief Packed Average (SSE2). + inline void pavgw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PAVGW, &dst, &src); + } + + //! @brief Packed Compare for Equal (BYTES) (SSE2). + inline void pcmpeqb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PCMPEQB, &dst, &src); + } + //! @brief Packed Compare for Equal (BYTES) (SSE2). + inline void pcmpeqb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQB, &dst, &src); + } + + //! @brief Packed Compare for Equal (WORDS) (SSE2). + inline void pcmpeqw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PCMPEQW, &dst, &src); + } + //! @brief Packed Compare for Equal (WORDS) (SSE2). + inline void pcmpeqw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQW, &dst, &src); + } + + //! @brief Packed Compare for Equal (DWORDS) (SSE2). + inline void pcmpeqd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PCMPEQD, &dst, &src); + } + //! @brief Packed Compare for Equal (DWORDS) (SSE2). + inline void pcmpeqd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQD, &dst, &src); + } + + //! @brief Packed Compare for Greater Than (BYTES) (SSE2). + inline void pcmpgtb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PCMPGTB, &dst, &src); + } + //! @brief Packed Compare for Greater Than (BYTES) (SSE2). + inline void pcmpgtb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTB, &dst, &src); + } + + //! @brief Packed Compare for Greater Than (WORDS) (SSE2). + inline void pcmpgtw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PCMPGTW, &dst, &src); + } + //! @brief Packed Compare for Greater Than (WORDS) (SSE2). + inline void pcmpgtw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTW, &dst, &src); + } + + //! @brief Packed Compare for Greater Than (DWORDS) (SSE2). + inline void pcmpgtd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PCMPGTD, &dst, &src); + } + //! @brief Packed Compare for Greater Than (DWORDS) (SSE2). + inline void pcmpgtd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTD, &dst, &src); + } + + //! @brief Packed Signed Integer Word Maximum (SSE2). + inline void pmaxsw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMAXSW, &dst, &src); + } + //! @brief Packed Signed Integer Word Maximum (SSE2). + inline void pmaxsw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMAXSW, &dst, &src); + } + + //! @brief Packed Unsigned Integer Byte Maximum (SSE2). + inline void pmaxub(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMAXUB, &dst, &src); + } + //! @brief Packed Unsigned Integer Byte Maximum (SSE2). + inline void pmaxub(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMAXUB, &dst, &src); + } + + //! @brief Packed Signed Integer Word Minimum (SSE2). + inline void pminsw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMINSW, &dst, &src); + } + //! @brief Packed Signed Integer Word Minimum (SSE2). + inline void pminsw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMINSW, &dst, &src); + } + + //! @brief Packed Unsigned Integer Byte Minimum (SSE2). + inline void pminub(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMINUB, &dst, &src); + } + //! @brief Packed Unsigned Integer Byte Minimum (SSE2). + inline void pminub(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMINUB, &dst, &src); + } + + //! @brief Move Byte Mask (SSE2). + inline void pmovmskb(const GPVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMOVMSKB, &dst, &src); + } + + //! @brief Packed Multiply High (SSE2). + inline void pmulhw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMULHW, &dst, &src); + } + //! @brief Packed Multiply High (SSE2). + inline void pmulhw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMULHW, &dst, &src); + } + + //! @brief Packed Multiply High Unsigned (SSE2). + inline void pmulhuw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMULHUW, &dst, &src); + } + //! @brief Packed Multiply High Unsigned (SSE2). + inline void pmulhuw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMULHUW, &dst, &src); + } + + //! @brief Packed Multiply Low (SSE2). + inline void pmullw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMULLW, &dst, &src); + } + //! @brief Packed Multiply Low (SSE2). + inline void pmullw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMULLW, &dst, &src); + } + + //! @brief Packed Multiply to QWORD (SSE2). + inline void pmuludq(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PMULUDQ, &dst, &src); + } + //! @brief Packed Multiply to QWORD (SSE2). + inline void pmuludq(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMULUDQ, &dst, &src); + } + + //! @brief Packed Multiply to QWORD (SSE2). + inline void pmuludq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMULUDQ, &dst, &src); + } + //! @brief Packed Multiply to QWORD (SSE2). + inline void pmuludq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMULUDQ, &dst, &src); + } + + //! @brief Bitwise Logical OR (SSE2). + inline void por(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_POR, &dst, &src); + } + //! @brief Bitwise Logical OR (SSE2). + inline void por(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_POR, &dst, &src); + } + + //! @brief Packed Shift Left Logical (SSE2). + inline void pslld(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSLLD, &dst, &src); + } + //! @brief Packed Shift Left Logical (SSE2). + inline void pslld(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSLLD, &dst, &src); + } + //! @brief Packed Shift Left Logical (SSE2). + inline void pslld(const XMMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSLLD, &dst, &src); + } + + //! @brief Packed Shift Left Logical (SSE2). + inline void psllq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSLLQ, &dst, &src); + } + //! @brief Packed Shift Left Logical (SSE2). + inline void psllq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSLLQ, &dst, &src); + } + //! @brief Packed Shift Left Logical (SSE2). + inline void psllq(const XMMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSLLQ, &dst, &src); + } + + //! @brief Packed Shift Left Logical (SSE2). + inline void psllw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSLLW, &dst, &src); + } + //! @brief Packed Shift Left Logical (SSE2). + inline void psllw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSLLW, &dst, &src); + } + //! @brief Packed Shift Left Logical (SSE2). + inline void psllw(const XMMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSLLW, &dst, &src); + } + + //! @brief Packed Shift Left Logical (SSE2). + inline void pslldq(const XMMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSLLDQ, &dst, &src); + } + + //! @brief Packed Shift Right Arithmetic (SSE2). + inline void psrad(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSRAD, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (SSE2). + inline void psrad(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSRAD, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (SSE2). + inline void psrad(const XMMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSRAD, &dst, &src); + } + + //! @brief Packed Shift Right Arithmetic (SSE2). + inline void psraw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSRAW, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (SSE2). + inline void psraw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSRAW, &dst, &src); + } + //! @brief Packed Shift Right Arithmetic (SSE2). + inline void psraw(const XMMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSRAW, &dst, &src); + } + + //! @brief Packed Subtract (SSE2). + inline void psubb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSUBB, &dst, &src); + } + //! @brief Packed Subtract (SSE2). + inline void psubb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBB, &dst, &src); + } + + //! @brief Packed Subtract (SSE2). + inline void psubw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSUBW, &dst, &src); + } + //! @brief Packed Subtract (SSE2). + inline void psubw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBW, &dst, &src); + } + + //! @brief Packed Subtract (SSE2). + inline void psubd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSUBD, &dst, &src); + } + //! @brief Packed Subtract (SSE2). + inline void psubd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBD, &dst, &src); + } + + //! @brief Packed Subtract (SSE2). + inline void psubq(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSUBQ, &dst, &src); + } + //! @brief Packed Subtract (SSE2). + inline void psubq(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBQ, &dst, &src); + } + + //! @brief Packed Subtract (SSE2). + inline void psubq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSUBQ, &dst, &src); + } + //! @brief Packed Subtract (SSE2). + inline void psubq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBQ, &dst, &src); + } + + //! @brief Packed Multiply and Add (SSE2). + inline void pmaddwd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMADDWD, &dst, &src); + } + //! @brief Packed Multiply and Add (SSE2). + inline void pmaddwd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMADDWD, &dst, &src); + } + + //! @brief Shuffle Packed DWORDs (SSE2). + inline void pshufd(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFD, &dst, &src, &imm8); + } + //! @brief Shuffle Packed DWORDs (SSE2). + inline void pshufd(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFD, &dst, &src, &imm8); + } + + //! @brief Shuffle Packed High Words (SSE2). + inline void pshufhw(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFHW, &dst, &src, &imm8); + } + //! @brief Shuffle Packed High Words (SSE2). + inline void pshufhw(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFHW, &dst, &src, &imm8); + } + + //! @brief Shuffle Packed Low Words (SSE2). + inline void pshuflw(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFLW, &dst, &src, &imm8); + } + //! @brief Shuffle Packed Low Words (SSE2). + inline void pshuflw(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PSHUFLW, &dst, &src, &imm8); + } + + //! @brief Packed Shift Right Logical (SSE2). + inline void psrld(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSRLD, &dst, &src); + } + //! @brief Packed Shift Right Logical (SSE2). + inline void psrld(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSRLD, &dst, &src); + } + //! @brief Packed Shift Right Logical (SSE2). + inline void psrld(const XMMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSRLD, &dst, &src); + } + + //! @brief Packed Shift Right Logical (SSE2). + inline void psrlq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSRLQ, &dst, &src); + } + //! @brief Packed Shift Right Logical (SSE2). + inline void psrlq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSRLQ, &dst, &src); + } + //! @brief Packed Shift Right Logical (SSE2). + inline void psrlq(const XMMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSRLQ, &dst, &src); + } + + //! @brief DQWord Shift Right Logical (MMX). + inline void psrldq(const XMMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSRLDQ, &dst, &src); + } + + //! @brief Packed Shift Right Logical (SSE2). + inline void psrlw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSRLW, &dst, &src); + } + //! @brief Packed Shift Right Logical (SSE2). + inline void psrlw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSRLW, &dst, &src); + } + //! @brief Packed Shift Right Logical (SSE2). + inline void psrlw(const XMMVar& dst, const Imm& src) + { + _emitInstruction(INST_PSRLW, &dst, &src); + } + + //! @brief Packed Subtract with Saturation (SSE2). + inline void psubsb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSUBSB, &dst, &src); + } + //! @brief Packed Subtract with Saturation (SSE2). + inline void psubsb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBSB, &dst, &src); + } + + //! @brief Packed Subtract with Saturation (SSE2). + inline void psubsw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSUBSW, &dst, &src); + } + //! @brief Packed Subtract with Saturation (SSE2). + inline void psubsw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBSW, &dst, &src); + } + + //! @brief Packed Subtract with Unsigned Saturation (SSE2). + inline void psubusb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSUBUSB, &dst, &src); + } + //! @brief Packed Subtract with Unsigned Saturation (SSE2). + inline void psubusb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBUSB, &dst, &src); + } + + //! @brief Packed Subtract with Unsigned Saturation (SSE2). + inline void psubusw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSUBUSW, &dst, &src); + } + //! @brief Packed Subtract with Unsigned Saturation (SSE2). + inline void psubusw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSUBUSW, &dst, &src); + } + + //! @brief Unpack High Data (SSE2). + inline void punpckhbw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PUNPCKHBW, &dst, &src); + } + //! @brief Unpack High Data (SSE2). + inline void punpckhbw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHBW, &dst, &src); + } + + //! @brief Unpack High Data (SSE2). + inline void punpckhwd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PUNPCKHWD, &dst, &src); + } + //! @brief Unpack High Data (SSE2). + inline void punpckhwd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHWD, &dst, &src); + } + + //! @brief Unpack High Data (SSE2). + inline void punpckhdq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PUNPCKHDQ, &dst, &src); + } + //! @brief Unpack High Data (SSE2). + inline void punpckhdq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHDQ, &dst, &src); + } + + //! @brief Unpack High Data (SSE2). + inline void punpckhqdq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PUNPCKHQDQ, &dst, &src); + } + //! @brief Unpack High Data (SSE2). + inline void punpckhqdq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKHQDQ, &dst, &src); + } + + //! @brief Unpack Low Data (SSE2). + inline void punpcklbw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PUNPCKLBW, &dst, &src); + } + //! @brief Unpack Low Data (SSE2). + inline void punpcklbw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLBW, &dst, &src); + } + + //! @brief Unpack Low Data (SSE2). + inline void punpcklwd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PUNPCKLWD, &dst, &src); + } + //! @brief Unpack Low Data (SSE2). + inline void punpcklwd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLWD, &dst, &src); + } + + //! @brief Unpack Low Data (SSE2). + inline void punpckldq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PUNPCKLDQ, &dst, &src); + } + //! @brief Unpack Low Data (SSE2). + inline void punpckldq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLDQ, &dst, &src); + } + + //! @brief Unpack Low Data (SSE2). + inline void punpcklqdq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PUNPCKLQDQ, &dst, &src); + } + //! @brief Unpack Low Data (SSE2). + inline void punpcklqdq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PUNPCKLQDQ, &dst, &src); + } + + //! @brief Bitwise Exclusive OR (SSE2). + inline void pxor(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PXOR, &dst, &src); + } + //! @brief Bitwise Exclusive OR (SSE2). + inline void pxor(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PXOR, &dst, &src); + } + + //! @brief Shuffle DP-FP (SSE2). + inline void shufpd(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_SHUFPD, &dst, &src, &imm8); + } + //! @brief Shuffle DP-FP (SSE2). + inline void shufpd(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_SHUFPD, &dst, &src, &imm8); + } + + //! @brief Compute Square Roots of Packed DP-FP Values (SSE2). + inline void sqrtpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_SQRTPD, &dst, &src); + } + //! @brief Compute Square Roots of Packed DP-FP Values (SSE2). + inline void sqrtpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_SQRTPD, &dst, &src); + } + + //! @brief Compute Square Root of Scalar DP-FP Value (SSE2). + inline void sqrtsd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_SQRTSD, &dst, &src); + } + //! @brief Compute Square Root of Scalar DP-FP Value (SSE2). + inline void sqrtsd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_SQRTSD, &dst, &src); + } + + //! @brief Packed DP-FP Subtract (SSE2). + inline void subpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_SUBPD, &dst, &src); + } + //! @brief Packed DP-FP Subtract (SSE2). + inline void subpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_SUBPD, &dst, &src); + } + + //! @brief Scalar DP-FP Subtract (SSE2). + inline void subsd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_SUBSD, &dst, &src); + } + //! @brief Scalar DP-FP Subtract (SSE2). + inline void subsd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_SUBSD, &dst, &src); + } + + //! @brief Scalar Unordered DP-FP Compare and Set EFLAGS (SSE2). + inline void ucomisd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_UCOMISD, &dst, &src); + } + //! @brief Scalar Unordered DP-FP Compare and Set EFLAGS (SSE2). + inline void ucomisd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_UCOMISD, &dst, &src); + } + + //! @brief Unpack and Interleave High Packed Double-Precision FP Values (SSE2). + inline void unpckhpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_UNPCKHPD, &dst, &src); + } + //! @brief Unpack and Interleave High Packed Double-Precision FP Values (SSE2). + inline void unpckhpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_UNPCKHPD, &dst, &src); + } + + //! @brief Unpack and Interleave Low Packed Double-Precision FP Values (SSE2). + inline void unpcklpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_UNPCKLPD, &dst, &src); + } + //! @brief Unpack and Interleave Low Packed Double-Precision FP Values (SSE2). + inline void unpcklpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_UNPCKLPD, &dst, &src); + } + + //! @brief Bit-wise Logical OR for DP-FP Data (SSE2). + inline void xorpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_XORPD, &dst, &src); + } + //! @brief Bit-wise Logical OR for DP-FP Data (SSE2). + inline void xorpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_XORPD, &dst, &src); + } + + // -------------------------------------------------------------------------- + // [SSE3] + // -------------------------------------------------------------------------- + + //! @brief Packed DP-FP Add/Subtract (SSE3). + inline void addsubpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_ADDSUBPD, &dst, &src); + } + //! @brief Packed DP-FP Add/Subtract (SSE3). + inline void addsubpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_ADDSUBPD, &dst, &src); + } + + //! @brief Packed SP-FP Add/Subtract (SSE3). + inline void addsubps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_ADDSUBPS, &dst, &src); + } + //! @brief Packed SP-FP Add/Subtract (SSE3). + inline void addsubps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_ADDSUBPS, &dst, &src); + } + +#if ASMJIT_NOT_SUPPORTED_BY_COMPILER + // TODO: NOT IMPLEMENTED BY THE COMPILER. + //! @brief Store Integer with Truncation (SSE3). + inline void fisttp(const Mem& dst) + { + _emitInstruction(INST_FISTTP, &dst); + } +#endif // ASMJIT_NOT_SUPPORTED_BY_COMPILER + + //! @brief Packed DP-FP Horizontal Add (SSE3). + inline void haddpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_HADDPD, &dst, &src); + } + //! @brief Packed DP-FP Horizontal Add (SSE3). + inline void haddpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_HADDPD, &dst, &src); + } + + //! @brief Packed SP-FP Horizontal Add (SSE3). + inline void haddps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_HADDPS, &dst, &src); + } + //! @brief Packed SP-FP Horizontal Add (SSE3). + inline void haddps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_HADDPS, &dst, &src); + } + + //! @brief Packed DP-FP Horizontal Subtract (SSE3). + inline void hsubpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_HSUBPD, &dst, &src); + } + //! @brief Packed DP-FP Horizontal Subtract (SSE3). + inline void hsubpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_HSUBPD, &dst, &src); + } + + //! @brief Packed SP-FP Horizontal Subtract (SSE3). + inline void hsubps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_HSUBPS, &dst, &src); + } + //! @brief Packed SP-FP Horizontal Subtract (SSE3). + inline void hsubps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_HSUBPS, &dst, &src); + } + + //! @brief Load Unaligned Integer 128 Bits (SSE3). + inline void lddqu(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_LDDQU, &dst, &src); + } + +#if ASMJIT_NOT_SUPPORTED_BY_COMPILER + //! @brief Set Up Monitor Address (SSE3). + inline void monitor() + { + _emitInstruction(INST_MONITOR); + } +#endif // ASMJIT_NOT_SUPPORTED_BY_COMPILER + + //! @brief Move One DP-FP and Duplicate (SSE3). + inline void movddup(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVDDUP, &dst, &src); + } + //! @brief Move One DP-FP and Duplicate (SSE3). + inline void movddup(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVDDUP, &dst, &src); + } + + //! @brief Move Packed SP-FP High and Duplicate (SSE3). + inline void movshdup(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVSHDUP, &dst, &src); + } + //! @brief Move Packed SP-FP High and Duplicate (SSE3). + inline void movshdup(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVSHDUP, &dst, &src); + } + + //! @brief Move Packed SP-FP Low and Duplicate (SSE3). + inline void movsldup(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_MOVSLDUP, &dst, &src); + } + //! @brief Move Packed SP-FP Low and Duplicate (SSE3). + inline void movsldup(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVSLDUP, &dst, &src); + } + +#if ASMJIT_NOT_SUPPORTED_BY_COMPILER + //! @brief Monitor Wait (SSE3). + inline void mwait() + { + _emitInstruction(INST_MWAIT); + } +#endif // ASMJIT_NOT_SUPPORTED_BY_COMPILER + + // -------------------------------------------------------------------------- + // [SSSE3] + // -------------------------------------------------------------------------- + + //! @brief Packed SIGN (SSSE3). + inline void psignb(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSIGNB, &dst, &src); + } + //! @brief Packed SIGN (SSSE3). + inline void psignb(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSIGNB, &dst, &src); + } + + //! @brief Packed SIGN (SSSE3). + inline void psignb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSIGNB, &dst, &src); + } + //! @brief Packed SIGN (SSSE3). + inline void psignb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSIGNB, &dst, &src); + } + + //! @brief Packed SIGN (SSSE3). + inline void psignw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSIGNW, &dst, &src); + } + //! @brief Packed SIGN (SSSE3). + inline void psignw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSIGNW, &dst, &src); + } + + //! @brief Packed SIGN (SSSE3). + inline void psignw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSIGNW, &dst, &src); + } + //! @brief Packed SIGN (SSSE3). + inline void psignw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSIGNW, &dst, &src); + } + + //! @brief Packed SIGN (SSSE3). + inline void psignd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSIGND, &dst, &src); + } + //! @brief Packed SIGN (SSSE3). + inline void psignd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSIGND, &dst, &src); + } + + //! @brief Packed SIGN (SSSE3). + inline void psignd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSIGND, &dst, &src); + } + //! @brief Packed SIGN (SSSE3). + inline void psignd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSIGND, &dst, &src); + } + + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PHADDW, &dst, &src); + } + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PHADDW, &dst, &src); + } + + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PHADDW, &dst, &src); + } + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PHADDW, &dst, &src); + } + + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PHADDD, &dst, &src); + } + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PHADDD, &dst, &src); + } + + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PHADDD, &dst, &src); + } + //! @brief Packed Horizontal Add (SSSE3). + inline void phaddd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PHADDD, &dst, &src); + } + + //! @brief Packed Horizontal Add and Saturate (SSSE3). + inline void phaddsw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PHADDSW, &dst, &src); + } + //! @brief Packed Horizontal Add and Saturate (SSSE3). + inline void phaddsw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PHADDSW, &dst, &src); + } + + //! @brief Packed Horizontal Add and Saturate (SSSE3). + inline void phaddsw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PHADDSW, &dst, &src); + } + //! @brief Packed Horizontal Add and Saturate (SSSE3). + inline void phaddsw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PHADDSW, &dst, &src); + } + + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PHSUBW, &dst, &src); + } + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PHSUBW, &dst, &src); + } + + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PHSUBW, &dst, &src); + } + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PHSUBW, &dst, &src); + } + + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PHSUBD, &dst, &src); + } + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PHSUBD, &dst, &src); + } + + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PHSUBD, &dst, &src); + } + //! @brief Packed Horizontal Subtract (SSSE3). + inline void phsubd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PHSUBD, &dst, &src); + } + + //! @brief Packed Horizontal Subtract and Saturate (SSSE3). + inline void phsubsw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PHSUBSW, &dst, &src); + } + //! @brief Packed Horizontal Subtract and Saturate (SSSE3). + inline void phsubsw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PHSUBSW, &dst, &src); + } + + //! @brief Packed Horizontal Subtract and Saturate (SSSE3). + inline void phsubsw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PHSUBSW, &dst, &src); + } + //! @brief Packed Horizontal Subtract and Saturate (SSSE3). + inline void phsubsw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PHSUBSW, &dst, &src); + } + + //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3). + inline void pmaddubsw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PMADDUBSW, &dst, &src); + } + //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3). + inline void pmaddubsw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMADDUBSW, &dst, &src); + } + + //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3). + inline void pmaddubsw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMADDUBSW, &dst, &src); + } + //! @brief Multiply and Add Packed Signed and Unsigned Bytes (SSSE3). + inline void pmaddubsw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMADDUBSW, &dst, &src); + } + + //! @brief Packed Absolute Value (SSSE3). + inline void pabsb(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PABSB, &dst, &src); + } + //! @brief Packed Absolute Value (SSSE3). + inline void pabsb(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PABSB, &dst, &src); + } + + //! @brief Packed Absolute Value (SSSE3). + inline void pabsb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PABSB, &dst, &src); + } + //! @brief Packed Absolute Value (SSSE3). + inline void pabsb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PABSB, &dst, &src); + } + + //! @brief Packed Absolute Value (SSSE3). + inline void pabsw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PABSW, &dst, &src); + } + //! @brief Packed Absolute Value (SSSE3). + inline void pabsw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PABSW, &dst, &src); + } + + //! @brief Packed Absolute Value (SSSE3). + inline void pabsw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PABSW, &dst, &src); + } + //! @brief Packed Absolute Value (SSSE3). + inline void pabsw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PABSW, &dst, &src); + } + + //! @brief Packed Absolute Value (SSSE3). + inline void pabsd(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PABSD, &dst, &src); + } + //! @brief Packed Absolute Value (SSSE3). + inline void pabsd(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PABSD, &dst, &src); + } + + //! @brief Packed Absolute Value (SSSE3). + inline void pabsd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PABSD, &dst, &src); + } + //! @brief Packed Absolute Value (SSSE3). + inline void pabsd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PABSD, &dst, &src); + } + + //! @brief Packed Multiply High with Round and Scale (SSSE3). + inline void pmulhrsw(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PMULHRSW, &dst, &src); + } + //! @brief Packed Multiply High with Round and Scale (SSSE3). + inline void pmulhrsw(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMULHRSW, &dst, &src); + } + + //! @brief Packed Multiply High with Round and Scale (SSSE3). + inline void pmulhrsw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMULHRSW, &dst, &src); + } + //! @brief Packed Multiply High with Round and Scale (SSSE3). + inline void pmulhrsw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMULHRSW, &dst, &src); + } + + //! @brief Packed Shuffle Bytes (SSSE3). + inline void pshufb(const MMVar& dst, const MMVar& src) + { + _emitInstruction(INST_PSHUFB, &dst, &src); + } + //! @brief Packed Shuffle Bytes (SSSE3). + inline void pshufb(const MMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSHUFB, &dst, &src); + } + + //! @brief Packed Shuffle Bytes (SSSE3). + inline void pshufb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PSHUFB, &dst, &src); + } + //! @brief Packed Shuffle Bytes (SSSE3). + inline void pshufb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PSHUFB, &dst, &src); + } + + //! @brief Packed Shuffle Bytes (SSSE3). + inline void palignr(const MMVar& dst, const MMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PALIGNR, &dst, &src, &imm8); + } + //! @brief Packed Shuffle Bytes (SSSE3). + inline void palignr(const MMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PALIGNR, &dst, &src, &imm8); + } + + //! @brief Packed Shuffle Bytes (SSSE3). + inline void palignr(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PALIGNR, &dst, &src, &imm8); + } + //! @brief Packed Shuffle Bytes (SSSE3). + inline void palignr(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PALIGNR, &dst, &src, &imm8); + } + + // -------------------------------------------------------------------------- + // [SSE4.1] + // -------------------------------------------------------------------------- + + //! @brief Blend Packed DP-FP Values (SSE4.1). + inline void blendpd(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_BLENDPD, &dst, &src, &imm8); + } + //! @brief Blend Packed DP-FP Values (SSE4.1). + inline void blendpd(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_BLENDPD, &dst, &src, &imm8); + } + + //! @brief Blend Packed SP-FP Values (SSE4.1). + inline void blendps(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_BLENDPS, &dst, &src, &imm8); + } + //! @brief Blend Packed SP-FP Values (SSE4.1). + inline void blendps(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_BLENDPS, &dst, &src, &imm8); + } + + //! @brief Variable Blend Packed DP-FP Values (SSE4.1). + inline void blendvpd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_BLENDVPD, &dst, &src); + } + //! @brief Variable Blend Packed DP-FP Values (SSE4.1). + inline void blendvpd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_BLENDVPD, &dst, &src); + } + + //! @brief Variable Blend Packed SP-FP Values (SSE4.1). + inline void blendvps(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_BLENDVPS, &dst, &src); + } + //! @brief Variable Blend Packed SP-FP Values (SSE4.1). + inline void blendvps(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_BLENDVPS, &dst, &src); + } + + //! @brief Dot Product of Packed DP-FP Values (SSE4.1). + inline void dppd(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_DPPD, &dst, &src, &imm8); + } + //! @brief Dot Product of Packed DP-FP Values (SSE4.1). + inline void dppd(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_DPPD, &dst, &src, &imm8); + } + + //! @brief Dot Product of Packed SP-FP Values (SSE4.1). + inline void dpps(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_DPPS, &dst, &src, &imm8); + } + //! @brief Dot Product of Packed SP-FP Values (SSE4.1). + inline void dpps(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_DPPS, &dst, &src, &imm8); + } + + //! @brief Extract Packed SP-FP Value (SSE4.1). + inline void extractps(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_EXTRACTPS, &dst, &src, &imm8); + } + //! @brief Extract Packed SP-FP Value (SSE4.1). + inline void extractps(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_EXTRACTPS, &dst, &src, &imm8); + } + + //! @brief Load Double Quadword Non-Temporal Aligned Hint (SSE4.1). + inline void movntdqa(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_MOVNTDQA, &dst, &src); + } + + //! @brief Compute Multiple Packed Sums of Absolute Difference (SSE4.1). + inline void mpsadbw(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_MPSADBW, &dst, &src, &imm8); + } + //! @brief Compute Multiple Packed Sums of Absolute Difference (SSE4.1). + inline void mpsadbw(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_MPSADBW, &dst, &src, &imm8); + } + + //! @brief Pack with Unsigned Saturation (SSE4.1). + inline void packusdw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PACKUSDW, &dst, &src); + } + //! @brief Pack with Unsigned Saturation (SSE4.1). + inline void packusdw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PACKUSDW, &dst, &src); + } + + //! @brief Variable Blend Packed Bytes (SSE4.1). + inline void pblendvb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PBLENDVB, &dst, &src); + } + //! @brief Variable Blend Packed Bytes (SSE4.1). + inline void pblendvb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PBLENDVB, &dst, &src); + } + + //! @brief Blend Packed Words (SSE4.1). + inline void pblendw(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PBLENDW, &dst, &src, &imm8); + } + //! @brief Blend Packed Words (SSE4.1). + inline void pblendw(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PBLENDW, &dst, &src, &imm8); + } + + //! @brief Compare Packed Qword Data for Equal (SSE4.1). + inline void pcmpeqq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PCMPEQQ, &dst, &src); + } + //! @brief Compare Packed Qword Data for Equal (SSE4.1). + inline void pcmpeqq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPEQQ, &dst, &src); + } + + //! @brief Extract Byte (SSE4.1). + inline void pextrb(const GPVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRB, &dst, &src, &imm8); + } + //! @brief Extract Byte (SSE4.1). + inline void pextrb(const Mem& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRB, &dst, &src, &imm8); + } + + //! @brief Extract Dword (SSE4.1). + inline void pextrd(const GPVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRD, &dst, &src, &imm8); + } + //! @brief Extract Dword (SSE4.1). + inline void pextrd(const Mem& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRD, &dst, &src, &imm8); + } + + //! @brief Extract Dword (SSE4.1). + inline void pextrq(const GPVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRQ, &dst, &src, &imm8); + } + //! @brief Extract Dword (SSE4.1). + inline void pextrq(const Mem& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRQ, &dst, &src, &imm8); + } + + //! @brief Extract Word (SSE4.1). + inline void pextrw(const GPVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRW, &dst, &src, &imm8); + } + //! @brief Extract Word (SSE4.1). + inline void pextrw(const Mem& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PEXTRW, &dst, &src, &imm8); + } + + //! @brief Packed Horizontal Word Minimum (SSE4.1). + inline void phminposuw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PHMINPOSUW, &dst, &src); + } + //! @brief Packed Horizontal Word Minimum (SSE4.1). + inline void phminposuw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PHMINPOSUW, &dst, &src); + } + + //! @brief Insert Byte (SSE4.1). + inline void pinsrb(const XMMVar& dst, const GPVar& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRB, &dst, &src, &imm8); + } + //! @brief Insert Byte (SSE4.1). + inline void pinsrb(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRB, &dst, &src, &imm8); + } + + //! @brief Insert Dword (SSE4.1). + inline void pinsrd(const XMMVar& dst, const GPVar& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRD, &dst, &src, &imm8); + } + //! @brief Insert Dword (SSE4.1). + inline void pinsrd(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRD, &dst, &src, &imm8); + } + + //! @brief Insert Dword (SSE4.1). + inline void pinsrq(const XMMVar& dst, const GPVar& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRQ, &dst, &src, &imm8); + } + //! @brief Insert Dword (SSE4.1). + inline void pinsrq(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRQ, &dst, &src, &imm8); + } + + //! @brief Insert Word (SSE2). + inline void pinsrw(const XMMVar& dst, const GPVar& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRW, &dst, &src, &imm8); + } + //! @brief Insert Word (SSE2). + inline void pinsrw(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PINSRW, &dst, &src, &imm8); + } + + //! @brief Maximum of Packed Word Integers (SSE4.1). + inline void pmaxuw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMAXUW, &dst, &src); + } + //! @brief Maximum of Packed Word Integers (SSE4.1). + inline void pmaxuw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMAXUW, &dst, &src); + } + + //! @brief Maximum of Packed Signed Byte Integers (SSE4.1). + inline void pmaxsb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMAXSB, &dst, &src); + } + //! @brief Maximum of Packed Signed Byte Integers (SSE4.1). + inline void pmaxsb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMAXSB, &dst, &src); + } + + //! @brief Maximum of Packed Signed Dword Integers (SSE4.1). + inline void pmaxsd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMAXSD, &dst, &src); + } + //! @brief Maximum of Packed Signed Dword Integers (SSE4.1). + inline void pmaxsd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMAXSD, &dst, &src); + } + + //! @brief Maximum of Packed Unsigned Dword Integers (SSE4.1). + inline void pmaxud(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMAXUD, &dst, &src); + } + //! @brief Maximum of Packed Unsigned Dword Integers (SSE4.1). + inline void pmaxud(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMAXUD, &dst, &src); + } + + //! @brief Minimum of Packed Signed Byte Integers (SSE4.1). + inline void pminsb(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMINSB, &dst, &src); + } + //! @brief Minimum of Packed Signed Byte Integers (SSE4.1). + inline void pminsb(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMINSB, &dst, &src); + } + + //! @brief Minimum of Packed Word Integers (SSE4.1). + inline void pminuw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMINUW, &dst, &src); + } + //! @brief Minimum of Packed Word Integers (SSE4.1). + inline void pminuw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMINUW, &dst, &src); + } + + //! @brief Minimum of Packed Dword Integers (SSE4.1). + inline void pminud(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMINUD, &dst, &src); + } + //! @brief Minimum of Packed Dword Integers (SSE4.1). + inline void pminud(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMINUD, &dst, &src); + } + + //! @brief Minimum of Packed Dword Integers (SSE4.1). + inline void pminsd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMINSD, &dst, &src); + } + //! @brief Minimum of Packed Dword Integers (SSE4.1). + inline void pminsd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMINSD, &dst, &src); + } + + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxbw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMOVSXBW, &dst, &src); + } + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxbw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMOVSXBW, &dst, &src); + } + + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxbd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMOVSXBD, &dst, &src); + } + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxbd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMOVSXBD, &dst, &src); + } + + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxbq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMOVSXBQ, &dst, &src); + } + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxbq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMOVSXBQ, &dst, &src); + } + + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxwd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMOVSXWD, &dst, &src); + } + //! @brief Packed Move with Sign Extend (SSE4.1). + inline void pmovsxwd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMOVSXWD, &dst, &src); + } + + //! @brief (SSE4.1). + inline void pmovsxwq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMOVSXWQ, &dst, &src); + } + //! @brief (SSE4.1). + inline void pmovsxwq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMOVSXWQ, &dst, &src); + } + + //! @brief (SSE4.1). + inline void pmovsxdq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMOVSXDQ, &dst, &src); + } + //! @brief (SSE4.1). + inline void pmovsxdq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMOVSXDQ, &dst, &src); + } + + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxbw(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMOVZXBW, &dst, &src); + } + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxbw(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMOVZXBW, &dst, &src); + } + + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxbd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMOVZXBD, &dst, &src); + } + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxbd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMOVZXBD, &dst, &src); + } + + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxbq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMOVZXBQ, &dst, &src); + } + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxbq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMOVZXBQ, &dst, &src); + } + + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxwd(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMOVZXWD, &dst, &src); + } + //! @brief Packed Move with Zero Extend (SSE4.1). + inline void pmovzxwd(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMOVZXWD, &dst, &src); + } + + //! @brief (SSE4.1). + inline void pmovzxwq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMOVZXWQ, &dst, &src); + } + //! @brief (SSE4.1). + inline void pmovzxwq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMOVZXWQ, &dst, &src); + } + + //! @brief (SSE4.1). + inline void pmovzxdq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMOVZXDQ, &dst, &src); + } + //! @brief (SSE4.1). + inline void pmovzxdq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMOVZXDQ, &dst, &src); + } + + //! @brief Multiply Packed Signed Dword Integers (SSE4.1). + inline void pmuldq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMULDQ, &dst, &src); + } + //! @brief Multiply Packed Signed Dword Integers (SSE4.1). + inline void pmuldq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMULDQ, &dst, &src); + } + + //! @brief Multiply Packed Signed Integers and Store Low Result (SSE4.1). + inline void pmulld(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PMULLD, &dst, &src); + } + //! @brief Multiply Packed Signed Integers and Store Low Result (SSE4.1). + inline void pmulld(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PMULLD, &dst, &src); + } + + //! @brief Logical Compare (SSE4.1). + inline void ptest(const XMMVar& op1, const XMMVar& op2) + { + _emitInstruction(INST_PTEST, &op1, &op2); + } + //! @brief Logical Compare (SSE4.1). + inline void ptest(const XMMVar& op1, const Mem& op2) + { + _emitInstruction(INST_PTEST, &op1, &op2); + } + + //! Round Packed SP-FP Values @brief (SSE4.1). + inline void roundps(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDPS, &dst, &src, &imm8); + } + //! Round Packed SP-FP Values @brief (SSE4.1). + inline void roundps(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDPS, &dst, &src, &imm8); + } + + //! @brief Round Scalar SP-FP Values (SSE4.1). + inline void roundss(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDSS, &dst, &src, &imm8); + } + //! @brief Round Scalar SP-FP Values (SSE4.1). + inline void roundss(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDSS, &dst, &src, &imm8); + } + + //! @brief Round Packed DP-FP Values (SSE4.1). + inline void roundpd(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDPD, &dst, &src, &imm8); + } + //! @brief Round Packed DP-FP Values (SSE4.1). + inline void roundpd(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDPD, &dst, &src, &imm8); + } + + //! @brief Round Scalar DP-FP Values (SSE4.1). + inline void roundsd(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDSD, &dst, &src, &imm8); + } + //! @brief Round Scalar DP-FP Values (SSE4.1). + inline void roundsd(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_ROUNDSD, &dst, &src, &imm8); + } + + // -------------------------------------------------------------------------- + // [SSE4.2] + // -------------------------------------------------------------------------- + + //! @brief Accumulate CRC32 Value (polynomial 0x11EDC6F41) (SSE4.2). + inline void crc32(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_CRC32, &dst, &src); + } + //! @brief Accumulate CRC32 Value (polynomial 0x11EDC6F41) (SSE4.2). + inline void crc32(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_CRC32, &dst, &src); + } + + //! @brief Packed Compare Explicit Length Strings, Return Index (SSE4.2). + inline void pcmpestri(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPESTRI, &dst, &src, &imm8); + } + //! @brief Packed Compare Explicit Length Strings, Return Index (SSE4.2). + inline void pcmpestri(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPESTRI, &dst, &src, &imm8); + } + + //! @brief Packed Compare Explicit Length Strings, Return Mask (SSE4.2). + inline void pcmpestrm(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPESTRM, &dst, &src, &imm8); + } + //! @brief Packed Compare Explicit Length Strings, Return Mask (SSE4.2). + inline void pcmpestrm(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPESTRM, &dst, &src, &imm8); + } + + //! @brief Packed Compare Implicit Length Strings, Return Index (SSE4.2). + inline void pcmpistri(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPISTRI, &dst, &src, &imm8); + } + //! @brief Packed Compare Implicit Length Strings, Return Index (SSE4.2). + inline void pcmpistri(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPISTRI, &dst, &src, &imm8); + } + + //! @brief Packed Compare Implicit Length Strings, Return Mask (SSE4.2). + inline void pcmpistrm(const XMMVar& dst, const XMMVar& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPISTRM, &dst, &src, &imm8); + } + //! @brief Packed Compare Implicit Length Strings, Return Mask (SSE4.2). + inline void pcmpistrm(const XMMVar& dst, const Mem& src, const Imm& imm8) + { + _emitInstruction(INST_PCMPISTRM, &dst, &src, &imm8); + } + + //! @brief Compare Packed Data for Greater Than (SSE4.2). + inline void pcmpgtq(const XMMVar& dst, const XMMVar& src) + { + _emitInstruction(INST_PCMPGTQ, &dst, &src); + } + //! @brief Compare Packed Data for Greater Than (SSE4.2). + inline void pcmpgtq(const XMMVar& dst, const Mem& src) + { + _emitInstruction(INST_PCMPGTQ, &dst, &src); + } + + //! @brief Return the Count of Number of Bits Set to 1 (SSE4.2). + inline void popcnt(const GPVar& dst, const GPVar& src) + { + _emitInstruction(INST_POPCNT, &dst, &src); + } + //! @brief Return the Count of Number of Bits Set to 1 (SSE4.2). + inline void popcnt(const GPVar& dst, const Mem& src) + { + _emitInstruction(INST_POPCNT, &dst, &src); + } + + // -------------------------------------------------------------------------- + // [AMD only] + // -------------------------------------------------------------------------- + + //! @brief Prefetch (3dNow - Amd). + //! + //! Loads the entire 64-byte aligned memory sequence containing the + //! specified memory address into the L1 data cache. The position of + //! the specified memory address within the 64-byte cache line is + //! irrelevant. If a cache hit occurs, or if a memory fault is detected, + //! no bus cycle is initiated and the instruction is treated as a NOP. + inline void amd_prefetch(const Mem& mem) + { + _emitInstruction(INST_AMD_PREFETCH, &mem); + } + + //! @brief Prefetch and set cache to modified (3dNow - Amd). + //! + //! The PREFETCHW instruction loads the prefetched line and sets the + //! cache-line state to Modified, in anticipation of subsequent data + //! writes to the line. The PREFETCH instruction, by contrast, typically + //! sets the cache-line state to Exclusive (depending on the hardware + //! implementation). + inline void amd_prefetchw(const Mem& mem) + { + _emitInstruction(INST_AMD_PREFETCHW, &mem); + } + + // -------------------------------------------------------------------------- + // [Intel only] + // -------------------------------------------------------------------------- + + //! @brief Move Data After Swapping Bytes (SSE3 - Intel Atom). + inline void movbe(const GPVar& dst, const Mem& src) + { + ASMJIT_ASSERT(!dst.isGPB()); + _emitInstruction(INST_MOVBE, &dst, &src); + } + + //! @brief Move Data After Swapping Bytes (SSE3 - Intel Atom). + inline void movbe(const Mem& dst, const GPVar& src) + { + ASMJIT_ASSERT(!src.isGPB()); + _emitInstruction(INST_MOVBE, &dst, &src); + } + + // ------------------------------------------------------------------------- + // [Emit Options] + // ------------------------------------------------------------------------- + + //! @brief Assert LOCK# Signal Prefix. + //! + //! This instruction causes the processor's LOCK# signal to be asserted + //! during execution of the accompanying instruction (turns the + //! instruction into an atomic instruction). In a multiprocessor environment, + //! the LOCK# signal insures that the processor has exclusive use of any shared + //! memory while the signal is asserted. + //! + //! The LOCK prefix can be prepended only to the following instructions and + //! to those forms of the instructions that use a memory operand: ADD, ADC, + //! AND, BTC, BTR, BTS, CMPXCHG, DEC, INC, NEG, NOT, OR, SBB, SUB, XOR, XADD, + //! and XCHG. An undefined opcode exception will be generated if the LOCK + //! prefix is used with any other instruction. The XCHG instruction always + //! asserts the LOCK# signal regardless of the presence or absence of the LOCK + //! prefix. + inline void lock() + { + _emitOptions |= EMIT_OPTION_LOCK_PREFIX; + } + + //! @brief Force REX prefix to be emitted. + //! + //! This option should be used carefully, because there are unencodable + //! combinations. If you want to access ah, bh, ch or dh registers then you + //! can't emit REX prefix and it will cause an illegal instruction error. + //! + //! @note REX prefix is only valid for X64/AMD64 platform. + //! + //! @sa @c EMIT_OPTION_REX_PREFIX. + inline void rex() + { + _emitOptions |= EMIT_OPTION_REX_PREFIX; + } +}; + +// ============================================================================ +// [AsmJit::Compiler] +// ============================================================================ + +//! @brief Compiler - high level code generation. +//! +//! This class is used to store instruction stream and allows to modify +//! it on the fly. It uses different concept than @c AsmJit::Assembler class +//! and in fact @c AsmJit::Assembler is only used as a backend. Compiler never +//! emits machine code and each instruction you use is stored to instruction +//! array instead. This allows to modify instruction stream later and for +//! example to reorder instructions to make better performance. +//! +//! Using @c AsmJit::Compiler moves code generation to higher level. Higher +//! level constructs allows to write more abstract and extensible code that +//! is not possible with pure @c AsmJit::Assembler class. Because +//! @c AsmJit::Compiler needs to create many objects and lifetime of these +//! objects is small (same as @c AsmJit::Compiler lifetime itself) it uses +//! very fast memory management model. This model allows to create object +//! instances in nearly zero time (compared to @c malloc() or @c new() +//! operators) so overhead by creating machine code by @c AsmJit::Compiler +//! is minimized. +//! +//! @section AsmJit_Compiler_TheStory The Story +//! +//! Before telling you how Compiler works I'd like to write a story. I'd like +//! to cover reasons why this class was created and why I'm recommending to use +//! it. When I released the first version of AsmJit (0.1) it was a toy. The +//! first function I wrote was function which is still available as testjit and +//! which simply returns 1024. The reason why function works for both 32-bit/ +//! 64-bit mode and for Windows/Unix specific calling conventions is luck, no +//! arguments usage and no registers usage except returning value in EAX/RAX. +//! +//! Then I started a project called BlitJit which was targetted to generating +//! JIT code for computer graphics. After writing some lines I decided that I +//! can't join pieces of code together without abstraction, should be +//! pixels source pointer in ESI/RSI or EDI/RDI or it's completelly +//! irrellevant? What about destination pointer and SSE2 register for reading +//! input pixels? The simple answer might be "just pick some one and use it". +//! +//! Another reason for abstraction is function calling-conventions. It's really +//! not easy to write assembler code for 32-bit and 64-bit platform supporting +//! three calling conventions (32-bit is similar between Windows and Unix, but +//! 64-bit calling conventions are different). +//! +//! At this time I realized that I can't write code which uses named registers, +//! I need to abstract it. In most cases you don't need specific register, you +//! need to emit instruction that does something with 'virtual' register(s), +//! memory, immediate or label. +//! +//! The first version of AsmJit with Compiler was 0.5 (or 0.6?, can't remember). +//! There was support for 32-bit and 64-bit mode, function calling conventions, +//! but when emitting instructions the developer needed to decide which +//! registers are changed, which are only read or completely overwritten. This +//! model helped a lot when generating code, especially when joining more +//! code-sections together, but there was also small possibility for mistakes. +//! Simply the first version of Compiler was great improvement over low-level +//! Assembler class, but the API design wasn't perfect. +//! +//! The second version of Compiler, completelly rewritten and based on +//! different goals, is part of AsmJit starting at version 1.0. This version +//! was designed after the first one and it contains serious improvements over +//! the old one. The first improvement is that you just use instructions with +//! virtual registers - called variables. When using compiler there is no way +//! to use native registers, there are variables instead. AsmJit is smarter +//! than before and it knows which register is needed only for read (r), +//! read/write (w) or overwrite (x). Supported are also instructions which +//! are using some registers in implicit way (these registers are not part of +//! instruction definition in string form). For example to use CPUID instruction +//! you must give it four variables which will be automatically allocated to +//! input/output registers (EAX, EBX, ECX, EDX). +//! +//! Another improvement is algorithm used by a register allocator. In first +//! version the registers were allocated when creating instruction stream. In +//! new version registers are allocated after calling @c Compiler::make(). This +//! means that register allocator has information about scope of all variables +//! and their usage statistics. The algorithm to allocate registers is very +//! simple and it's always called as a 'linear scan register allocator'. When +//! you get out of registers the all possible variables are scored and the worst +//! is spilled. Of course algorithm ignores the variables used for current +//! instruction. +//! +//! In addition, because registers are allocated after the code stream is +//! generated, the state switches between jumps are handled by Compiler too. +//! You don't need to worry about jumps, compiler always do this dirty work +//! for you. +//! +//! The nearly last thing I'd like to present is calling other functions from +//! the generated code. AsmJit uses a @c FunctionPrototype class to hold +//! the function parameters, their position in stack (or register index) and +//! function return value. This class is used internally, but it can be +//! used to create your own function calling-convention. All standard function +//! calling conventions are implemented. +//! +//! Please enjoy the new version of Compiler, it was created for writing a +//! low-level code using high-level API, leaving developer to concentrate to +//! real problems and not to solving a register puzzle. +//! +//! @section AsmJit_Compiler_CodeGeneration Code Generation +//! +//! First that is needed to know about compiler is that compiler never emits +//! machine code. It's used as a middleware between @c AsmJit::Assembler and +//! your code. There is also convenience method @c make() that allows to +//! generate machine code directly without creating @c AsmJit::Assembler +//! instance. +//! +//! Comparison of generating machine code through @c Assembler and directly +//! by @c Compiler: +//! +//! @code +//! // Assembler instance is low level code generation class that emits +//! // machine code. +//! Assembler a; +//! +//! // Compiler instance is high level code generation class that stores all +//! // instructions in internal representation. +//! Compiler c; +//! +//! // ... put your code here ... +//! +//! // Final step - generate code. AsmJit::Compiler::serialize() will serialize +//! // all instructions into Assembler and this ensures generating real machine +//! // code. +//! c.serialize(a); +//! +//! // Your function +//! void* fn = a.make(); +//! @endcode +//! +//! Example how to generate machine code using only @c Compiler (preferred): +//! +//! @code +//! // Compiler instance is enough. +//! Compiler c; +//! +//! // ... put your code here ... +//! +//! // Your function +//! void* fn = c.make(); +//! @endcode +//! +//! You can see that there is @c AsmJit::Compiler::serialize() function that +//! emits instructions into @c AsmJit::Assembler(). This layered architecture +//! means that each class is used for something different and there is no code +//! duplication. For convenience there is also @c AsmJit::Compiler::make() +//! method that can create your function using @c AsmJit::Assembler, but +//! internally (this is preffered bahavior when using @c AsmJit::Compiler). +//! +//! The @c make() method allocates memory using @c CodeGenerator instance passed +//! into the @c Compiler constructor. If code generator is used to create JIT +//! function then virtual memory allocated by @c MemoryManager is used. To get +//! global memory manager use @c MemoryManager::getGlobal(). +//! +//! @code +//! // Compiler instance is enough. +//! Compiler c; +//! +//! // ... put your code using Compiler instance ... +//! +//! // Your function +//! void* fn = c.make(); +//! +//! // Free it if you don't want it anymore +//! // (using global memory manager instance) +//! MemoryManager::getGlobal()->free(fn); +//! @endcode +//! +//! @section AsmJit_Compiler_Functions Functions +//! +//! To build functions with @c Compiler, see @c AsmJit::Compiler::newFunction() +//! method. +//! +//! @section AsmJit_Compiler_Variables Variables +//! +//! Compiler is able to manage variables and function arguments. Internally +//! there is no difference between function argument and variable declared +//! inside. To get function argument you use @c argGP() method and to declare +//! variable use @c newGP(), @c newMM() and @c newXMM() methods. The @c newXXX() +//! methods accept also parameter describing the variable type. For example +//! the @c newGP() method always creates variable which size matches the target +//! architecture size (for 32-bit target the 32-bit variable is created, for +//! 64-bit target the variable size is 64-bit). To override this behavior the +//! variable type must be specified. +//! +//! @code +//! // Compiler and function declaration - void f(int*); +//! Compiler c; +//! c.newFunction(CALL_CONV_DEFAULT, BuildFunction1<int*>()); +//! +//! // Get argument variable (it's pointer). +//! GPVar a1(c.argGP(0)); +//! +//! // Create your variables. +//! GPVar x1(c.newGP(VARIABLE_TYPE_GPD)); +//! GPVar x2(c.newGP(VARIABLE_TYPE_GPD)); +//! +//! // Init your variables. +//! c.mov(x1, 1); +//! c.mov(x2, 2); +//! +//! // ... your code ... +//! c.add(x1, x2); +//! // ... your code ... +//! +//! // Store result to a given pointer in first argument +//! c.mov(dword_ptr(a1), x1); +//! +//! // End of function body. +//! c.endFunction(); +//! +//! // Make the function. +//! typedef void (*MyFn)(int*); +//! MyFn fn = function_cast<MyFn>(c.make()); +//! @endcode +//! +//! This code snipped needs to be explained. You can see that there are more +//! variable types that can be used by @c Compiler. Most useful variables can +//! be allocated using general purpose registers (@c GPVar), MMX registers +//! (@c MMVar) or SSE registers (@c XMMVar). +//! +//! X86/X64 variable types: +//! +//! - @c VARIABLE_TYPE_GPD - 32-bit general purpose register (EAX, EBX, ...). +//! - @c VARIABLE_TYPE_GPQ - 64-bit general purpose register (RAX, RBX, ...). +//! - @c VARIABLE_TYPE_GPN - 32-bit or 64-bit general purpose register, depends +//! to target architecture. Mapped to @c VARIABLE_TYPE_GPD or @c VARIABLE_TYPE_GPQ. +//! +//! - @c VARIABLE_TYPE_X87 - 80-bit floating point stack register st(0 to 7). +//! - @c VARIABLE_TYPE_X87_1F - 32-bit floating point stack register st(0 to 7). +//! - @c VARIABLE_TYPE_X87_1D - 64-bit floating point stack register st(0 to 7). +//! +//! - @c VARIALBE_TYPE_MM - 64-bit MMX register. +//! +//! - @c VARIABLE_TYPE_XMM - 128-bit SSE register. +//! - @c VARIABLE_TYPE_XMM_1F - 128-bit SSE register which contains +//! scalar 32-bit single precision floating point. +//! - @c VARIABLE_TYPE_XMM_1D - 128-bit SSE register which contains +//! scalar 64-bit double precision floating point. +//! - @c VARIABLE_TYPE_XMM_4F - 128-bit SSE register which contains +//! 4 packed 32-bit single precision floating points. +//! - @c VARIABLE_TYPE_XMM_2D - 128-bit SSE register which contains +//! 2 packed 64-bit double precision floating points. +//! +//! Unified variable types: +//! +//! - @c VARIABLE_TYPE_INT32 - 32-bit general purpose register. +//! - @c VARIABLE_TYPE_INT64 - 64-bit general purpose register. +//! - @c VARIABLE_TYPE_INTPTR - 32-bit or 64-bit general purpose register / pointer. +//! +//! - @c VARIABLE_TYPE_FLOAT - 32-bit single precision floating point. +//! - @c VARIABLE_TYPE_DOUBLE - 64-bit double precision floating point. +//! +//! Variable states: +//! +//! - @c VARIABLE_STATE_UNUSED - State that is assigned to newly created +//! variables or to not used variables (dereferenced to zero). +//! - @c VARIABLE_STATE_REGISTER - State that means that variable is currently +//! allocated in register. +//! - @c VARIABLE_STATE_MEMORY - State that means that variable is currently +//! only in memory location. +//! +//! When you create new variable, initial state is always @c VARIABLE_STATE_UNUSED, +//! allocating it to register or spilling to memory changes this state to +//! @c VARIABLE_STATE_REGISTER or @c VARIABLE_STATE_MEMORY, respectively. +//! During variable lifetime it's usual that its state is changed multiple +//! times. To generate better code, you can control allocating and spilling +//! by using up to four types of methods that allows it (see next list). +//! +//! Explicit variable allocating / spilling methods: +//! +//! - @c Compiler::alloc() - Explicit method to alloc variable into +//! register. You can use this before loops or code blocks. +//! +//! - @c Compiler::spill() - Explicit method to spill variable. If variable +//! is in register and you call this method, it's moved to its home memory +//! location. If variable is not in register no operation is performed. +//! +//! - @c Compiler::unuse() - Unuse variable (you can use this to end the +//! variable scope or sub-scope). +//! +//! Please see AsmJit tutorials (testcompiler.cpp and testvariables.cpp) for +//! more complete examples. +//! +//! @section AsmJit_Compiler_MemoryManagement Memory Management +//! +//! @c Compiler Memory management follows these rules: +//! - Everything created by @c Compiler is always freed by @c Compiler. +//! - To get decent performance, compiler always uses larger memory buffer +//! for objects to allocate and when compiler instance is destroyed, this +//! buffer is freed. Destructors of active objects are called when +//! destroying compiler instance. Destructors of abadonded compiler +//! objects are called immediately after abadonding them. +//! - This type of memory management is called 'zone memory management'. +//! +//! This means that you can't use any @c Compiler object after destructing it, +//! it also means that each object like @c Label, @c Var and others are created +//! and managed by @c Compiler itself. These objects contain ID which is used +//! internally by Compiler to store additional information about these objects. +//! +//! @section AsmJit_Compiler_StateManagement Control-Flow and State Management. +//! +//! The @c Compiler automatically manages state of the variables when using +//! control flow instructions like jumps, conditional jumps and calls. There +//! is minimal heuristics for choosing the method how state is saved or restored. +//! +//! Generally the state can be changed only when using jump or conditional jump +//! instruction. When using non-conditional jump then state change is embedded +//! into the instruction stream before the jump. When using conditional jump +//! the @c Compiler decides whether to restore state before the jump or whether +//! to use another block where state is restored. The last case is that no-code +//! have to be emitted and there is no state change (this is of course ideal). +//! +//! Choosing whether to embed 'restore-state' section before conditional jump +//! is quite simple. If jump is likely to be 'taken' then code is embedded, if +//! jump is unlikely to be taken then the small code section for state-switch +//! will be generated instead. +//! +//! Next example is the situation where the extended code block is used to +//! do state-change: +//! +//! @code +//! Compiler c; +//! +//! c.newFunction(CALL_CONV_DEFAULT, FunctionBuilder0<Void>()); +//! c.getFunction()->setHint(FUNCTION_HINT_NAKED, true); +//! +//! // Labels. +//! Label L0 = c.newLabel(); +//! +//! // Variables. +//! GPVar var0 = c.newGP(); +//! GPVar var1 = c.newGP(); +//! +//! // Cleanup. After these two lines, the var0 and var1 will be always stored +//! // in registers. Our example is very small, but in larger code the var0 can +//! // be spilled by xor(var1, var1). +//! c.xor_(var0, var0); +//! c.xor_(var1, var1); +//! c.cmp(var0, var1); +//! // State: +//! // var0 - register. +//! // var1 - register. +//! +//! // We manually spill these variables. +//! c.spill(var0); +//! c.spill(var1); +//! // State: +//! // var0 - memory. +//! // var1 - memory. +//! +//! // Conditional jump to L0. It will be always taken, but compiler thinks that +//! // it is unlikely taken so it will embed state change code somewhere. +//! c.je(L0); +//! +//! // Do something. The variables var0 and var1 will be allocated again. +//! c.add(var0, 1); +//! c.add(var1, 2); +//! // State: +//! // var0 - register. +//! // var1 - register. +//! +//! // Bind label here, the state is not changed. +//! c.bind(L0); +//! // State: +//! // var0 - register. +//! // var1 - register. +//! +//! // We need to use var0 and var1, because if compiler detects that variables +//! // are out of scope then it optimizes the state-change. +//! c.sub(var0, var1); +//! // State: +//! // var0 - register. +//! // var1 - register. +//! +//! c.endFunction(); +//! @endcode +//! +//! The output: +//! +//! @verbatim +//! xor eax, eax ; xor var_0, var_0 +//! xor ecx, ecx ; xor var_1, var_1 +//! cmp eax, ecx ; cmp var_0, var_1 +//! mov [esp - 24], eax ; spill var_0 +//! mov [esp - 28], ecx ; spill var_1 +//! je L0_Switch +//! mov eax, [esp - 24] ; alloc var_0 +//! add eax, 1 ; add var_0, 1 +//! mov ecx, [esp - 28] ; alloc var_1 +//! add ecx, 2 ; add var_1, 2 +//! L0: +//! sub eax, ecx ; sub var_0, var_1 +//! ret +//! +//! ; state-switch begin +//! L0_Switch0: +//! mov eax, [esp - 24] ; alloc var_0 +//! mov ecx, [esp - 28] ; alloc var_1 +//! jmp short L0 +//! ; state-switch end +//! @endverbatim +//! +//! You can see that the state-switch section was generated (see L0_Switch0). +//! The compiler is unable to restore state immediately when emitting the +//! forward jump (the code is generated from first to last instruction and +//! the target state is simply not known at this time). +//! +//! To tell @c Compiler that you want to embed state-switch code before jump +//! it's needed to create backward jump (where also processor expects that it +//! will be taken). To demonstrate the possibility to embed state-switch before +//! jump we use slightly modified code: +//! +//! @code +//! Compiler c; +//! +//! c.newFunction(CALL_CONV_DEFAULT, FunctionBuilder0<Void>()); +//! c.getFunction()->setHint(FUNCTION_HINT_NAKED, true); +//! +//! // Labels. +//! Label L0 = c.newLabel(); +//! +//! // Variables. +//! GPVar var0 = c.newGP(); +//! GPVar var1 = c.newGP(); +//! +//! // Cleanup. After these two lines, the var0 and var1 will be always stored +//! // in registers. Our example is very small, but in larger code the var0 can +//! // be spilled by xor(var1, var1). +//! c.xor_(var0, var0); +//! c.xor_(var1, var1); +//! // State: +//! // var0 - register. +//! // var1 - register. +//! +//! // We manually spill these variables. +//! c.spill(var0); +//! c.spill(var1); +//! // State: +//! // var0 - memory. +//! // var1 - memory. +//! +//! // Bind our label here. +//! c.bind(L0); +//! +//! // Do something, the variables will be allocated again. +//! c.add(var0, 1); +//! c.add(var1, 2); +//! // State: +//! // var0 - register. +//! // var1 - register. +//! +//! // Backward conditional jump to L0. The default behavior is that it is taken +//! // so state-change code will be embedded here. +//! c.je(L0); +//! +//! c.endFunction(); +//! @endcode +//! +//! The output: +//! +//! @verbatim +//! xor ecx, ecx ; xor var_0, var_0 +//! xor edx, edx ; xor var_1, var_1 +//! mov [esp - 24], ecx ; spill var_0 +//! mov [esp - 28], edx ; spill var_1 +//! L.2: +//! mov ecx, [esp - 24] ; alloc var_0 +//! add ecx, 1 ; add var_0, 1 +//! mov edx, [esp - 28] ; alloc var_1 +//! add edx, 2 ; add var_1, 2 +//! +//! ; state-switch begin +//! mov [esp - 24], ecx ; spill var_0 +//! mov [esp - 28], edx ; spill var_1 +//! ; state-switch end +//! +//! je short L.2 +//! ret +//! @endverbatim +//! +//! Please notice where the state-switch section is located. The @c Compiler +//! decided that jump is likely to be taken so the state change is embedded +//! before the conditional jump. To change this behavior into the previous +//! case it's needed to add a hint (@c HINT_TAKEN or @c HINT_NOT_TAKEN). +//! +//! Replacing the <code>c.je(L0)</code> by <code>c.je(L0, HINT_NOT_TAKEN) +//! will generate code like this: +//! +//! @verbatim +//! xor ecx, ecx ; xor var_0, var_0 +//! xor edx, edx ; xor var_1, var_1 +//! mov [esp - 24], ecx ; spill var_0 +//! mov [esp - 28], edx ; spill var_1 +//! L0: +//! mov ecx, [esp - 24] ; alloc var_0 +//! add ecx, 1 ; add var_0, a +//! mov edx, [esp - 28] ; alloc var_1 +//! add edx, 2 ; add var_1, 2 +//! je L0_Switch, 2 +//! ret +//! +//! ; state-switch begin +//! L0_Switch: +//! mov [esp - 24], ecx ; spill var_0 +//! mov [esp - 28], edx ; spill var_1 +//! jmp short L0 +//! ; state-switch end +//! @endverbatim +//! +//! This section provided information about how state-change works. The +//! behavior is deterministic and it can be overriden. +//! +//! @section AsmJit_Compiler_AdvancedCodeGeneration Advanced Code Generation +//! +//! This section describes advanced method of code generation available to +//! @c Compiler (but also to @c Assembler). When emitting code to instruction +//! stream the methods like @c mov(), @c add(), @c sub() can be called directly +//! (advantage is static-type control performed also by C++ compiler) or +//! indirectly using @c emit() method. The @c emit() method needs only +//! instruction code and operands. +//! +//! Example of code generating by standard type-safe API: +//! +//! @code +//! Compiler c; +//! GPVar var0 = c.newGP(); +//! GPVar var1 = c.newGP(); +//! +//! ... +//! +//! c.mov(var0, imm(0)); +//! c.add(var0, var1); +//! c.sub(var0, var1); +//! @endcode +//! +//! The code above can be rewritten as: +//! +//! @code +//! Compiler c; +//! GPVar var0 = c.newGP(); +//! GPVar var1 = c.newGP(); +//! +//! ... +//! +//! c.emit(INST_MOV, var0, imm(0)); +//! c.emit(INST_ADD, var0, var1); +//! c.emit(INST_SUB, var0, var1); +//! @endcode +//! +//! The advantage of first snippet is very friendly API and type-safe control +//! that is controlled by the C++ compiler. The advantage of second snippet is +//! availability to replace or generate instruction code in different places. +//! See the next example how the @c emit() method can be used to generate +//! abstract code. +//! +//! Use case: +//! +//! @code +//! bool emitArithmetic(Compiler& c, XMMVar& var0, XMMVar& var1, const char* op) +//! { +//! uint code = INVALID_VALUE; +//! +//! if (strcmp(op, "ADD") == 0) +//! code = INST_ADDSS; +//! else if (strcmp(op, "SUBTRACT") == 0) +//! code = INST_SUBSS; +//! else if (strcmp(op, "MULTIPLY") == 0) +//! code = INST_MULSS; +//! else if (strcmp(op, "DIVIDE") == 0) +//! code = INST_DIVSS; +//! else +//! // Invalid parameter? +//! return false; +//! +//! c.emit(code, var0, var1); +//! } +//! @endcode +//! +//! Other use cases are waiting for you! Be sure that instruction you are +//! emitting is correct and encodable, because if not, Assembler will set +//! error code to @c ERROR_UNKNOWN_INSTRUCTION. +//! +//! @section AsmJit_Compiler_CompilerDetails Compiler Details +//! +//! This section is here for people interested in the compiling process. There +//! are few steps that must be done for each compiled function (or your code). +//! +//! When your @c Compiler instance is ready, you can create function and add +//! emittables using intrinsics or higher level methods implemented by the +//! @c AsmJit::Compiler. When you are done (all instructions serialized) you +//! should call @c AsmJit::Compiler::make() method which will analyze your code, +//! allocate registers and memory for local variables and serialize all emittables +//! to @c AsmJit::Assembler instance. Next steps shows what's done internally +//! before code is serialized into @c AsmJit::Assembler +//! (implemented in @c AsmJit::Compiler::serialize() method). +//! +//! 1. Compiler try to match function and end-function emittables (these +//! emittables define the function-body or function block). +//! +//! 2. For all emittables inside the function-body the virtual functions +//! are called in this order: +//! - Emittable::prepare() +//! - Emittable::translate() +//! - Emittable::emit() +//! - Emittable::post() +//! +//! There is some extra work when emitting function prolog / epilog and +//! register allocator. +//! +//! 3. Emit jump tables data. +//! +//! When everything here ends, @c AsmJit::Assembler contains binary stream +//! that needs only relocation to be callable by C/C++ code. +//! +//! @section AsmJit_Compiler_Differences Summary of Differences between @c Assembler and @c Compiler +//! +//! - Instructions are not translated to machine code immediately, they are +//! stored as emmitables (see @c AsmJit::Emittable, @c AsmJit::EInstruction). +//! - Contains function builder and ability to call other functions. +//! - Contains register allocator and variable management. +//! - Contains a lot of helper methods to simplify the code generation not +//! available/possible in @c AsmJit::Assembler. +//! - Ability to pre-process or post-process the code which is being generated. +struct ASMJIT_API Compiler : public CompilerIntrinsics +{ + //! @brief Create the @c Compiler instance. + Compiler(CodeGenerator* codeGenerator = NULL) ASMJIT_NOTHROW; + //! @brief Destroy the @c Compiler instance. + virtual ~Compiler() ASMJIT_NOTHROW; +}; + +//! @} + +} // AsmJit namespace + +#undef ASMJIT_NOT_SUPPORTED_BY_COMPILER + +// [Api-End] +#include "ApiEnd.h" + +// [Guard] +#endif // _ASMJIT_COMPILERX86X64_H diff --git a/lib/AsmJit/Config.h b/lib/AsmJit/Config.h new file mode 100644 index 0000000..0577e3a --- /dev/null +++ b/lib/AsmJit/Config.h @@ -0,0 +1,93 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// This file is designed to be changeable. Platform specific changes +// should be applied to this file and this guarantes and never versions +// of AsmJit library will never overwrite generated config files. +// +// So modify this will by your build system or hand. + +// [Guard] +#ifndef _ASMJIT_CONFIG_H +#define _ASMJIT_CONFIG_H + +// ============================================================================ +// [AsmJit - OS] +// ============================================================================ + +// Provides definitions about your operating system. It's detected by default, +// so override it if you have problems with automatic detection. +// +// #define ASMJIT_WINDOWS 1 +// #define ASMJIT_POSIX 2 + +// ============================================================================ +// [AsmJit - Architecture] +// ============================================================================ + +// Provides definitions about your cpu architecture. It's detected by default, +// so override it if you have problems with automatic detection. + +// #define ASMJIT_X86 +// #define ASMJIT_X64 + +// ============================================================================ +// [AsmJit - API] +// ============================================================================ + +// If you are embedding AsmJit library into your project (statically), undef +// ASMJIT_API macro. ASMJIT_HIDDEN macro can contain visibility (used by GCC) +// to hide some AsmJit symbols that shouldn't be never exported. +// +// If you have problems with throw() in compilation time, undef ASMJIT_NOTHROW +// to disable this feature. ASMJIT_NOTHROW marks functions that never throws +// an exception. + +// #define ASMJIT_HIDDEN +// #define ASMJIT_API +// #define ASMJIT_NOTHROW + + +// ============================================================================ +// [AsmJit - Memory Management] +// ============================================================================ + +// #define ASMJIT_MALLOC ::malloc +// #define ASMJIT_REALLOC ::realloc +// #define ASMJIT_FREE ::free + +// ============================================================================ +// [AsmJit - Debug] +// ============================================================================ + +// Turn debug on/off (to bypass autodetection) +// #define ASMJIT_DEBUG +// #define ASMJIT_NO_DEBUG + +// Setup custom assertion code. +// #define ASMJIT_ASSERT(exp) do { if (!(exp)) ::AsmJit::assertionFailure(__FILE__, __LINE__, #exp); } while(0) + +// [Guard] +#endif // _ASMJIT_CONFIG_H diff --git a/lib/AsmJit/CpuInfo.cpp b/lib/AsmJit/CpuInfo.cpp new file mode 100644 index 0000000..227b912 --- /dev/null +++ b/lib/AsmJit/CpuInfo.cpp @@ -0,0 +1,356 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Dependencies] +#include "CpuInfo.h" + +#if defined(ASMJIT_WINDOWS) +# include <windows.h> +#endif // ASMJIT_WINDOWS + +// 2009-02-05: Thanks to Mike Tajmajer for supporting VC7.1 compiler. This +// shouldn't affect x64 compilation, because x64 compiler starts with +// VS2005 (VC8.0). +#if defined(_MSC_VER) +# if _MSC_VER >= 1400 +# include <intrin.h> +# endif // _MSC_VER >= 1400 (>= VS2005) +#endif // _MSC_VER + +#if defined(ASMJIT_POSIX) +#include <errno.h> +#include <string.h> +#include <sys/statvfs.h> +#include <sys/utsname.h> +#include <unistd.h> +#endif // ASMJIT_POSIX + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +// helpers +static uint32_t detectNumberOfProcessors(void) +{ +#if defined(ASMJIT_WINDOWS) + SYSTEM_INFO info; + GetSystemInfo(&info); + return info.dwNumberOfProcessors; +#elif defined(ASMJIT_POSIX) && defined(_SC_NPROCESSORS_ONLN) + // It seems that sysconf returns the number of "logical" processors on both + // mac and linux. So we get the number of "online logical" processors. + long res = sysconf(_SC_NPROCESSORS_ONLN); + if (res == -1) return 1; + + return static_cast<uint32_t>(res); +#else + return 1; +#endif +} + +// This is messy, I know. cpuid is implemented as intrinsic in VS2005, but +// we should support other compilers as well. Main problem is that MS compilers +// in 64-bit mode not allows to use inline assembler, so we need intrinsic and +// we need also asm version. + +// cpuid() and detectCpuInfo() for x86 and x64 platforms begins here. +#if defined(ASMJIT_X86) || defined(ASMJIT_X64) +void cpuid(uint32_t in, CpuId* out) ASMJIT_NOTHROW +{ +#if defined(_MSC_VER) + +// 2009-02-05: Thanks to Mike Tajmajer for supporting VC7.1 compiler. +// ASMJIT_X64 is here only for readibility, only VS2005 can compile 64-bit code. +# if _MSC_VER >= 1400 || defined(ASMJIT_X64) + // done by intrinsics + __cpuid(reinterpret_cast<int*>(out->i), in); +# else // _MSC_VER < 1400 + uint32_t cpuid_in = in; + uint32_t* cpuid_out = out->i; + + __asm + { + mov eax, cpuid_in + mov edi, cpuid_out + cpuid + mov dword ptr[edi + 0], eax + mov dword ptr[edi + 4], ebx + mov dword ptr[edi + 8], ecx + mov dword ptr[edi + 12], edx + } +# endif // _MSC_VER < 1400 + +#elif defined(__GNUC__) + +// Note, need to preserve ebx/rbx register! +# if defined(ASMJIT_X86) +# define __mycpuid(a, b, c, d, inp) \ + asm ("mov %%ebx, %%edi\n" \ + "cpuid\n" \ + "xchg %%edi, %%ebx\n" \ + : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp)) +# else +# define __mycpuid(a, b, c, d, inp) \ + asm ("mov %%rbx, %%rdi\n" \ + "cpuid\n" \ + "xchg %%rdi, %%rbx\n" \ + : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp)) +# endif + __mycpuid(out->eax, out->ebx, out->ecx, out->edx, in); + +#endif // compiler +} + +struct CpuVendorInfo +{ + uint32_t id; + char text[12]; +}; + +static const CpuVendorInfo cpuVendorInfo[] = +{ + { CPU_VENDOR_INTEL , { 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' } }, + + { CPU_VENDOR_AMD , { 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' } }, + { CPU_VENDOR_AMD , { 'A', 'M', 'D', 'i', 's', 'b', 'e', 't', 't', 'e', 'r', '!' } }, + + { CPU_VENDOR_NSM , { 'G', 'e', 'o', 'd', 'e', ' ', 'b', 'y', ' ', 'N', 'S', 'C' } }, + { CPU_VENDOR_NSM , { 'C', 'y', 'r', 'i', 'x', 'I', 'n', 's', 't', 'e', 'a', 'd' } }, + + { CPU_VENDOR_TRANSMETA, { 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'T', 'M', 'x', '8', '6' } }, + { CPU_VENDOR_TRANSMETA, { 'T', 'r', 'a', 'n', 's', 'm', 'e', 't', 'a', 'C', 'P', 'U' } }, + + { CPU_VENDOR_VIA , { 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 } }, + { CPU_VENDOR_VIA , { 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' } } +}; + +static inline bool cpuVencorEq(const CpuVendorInfo& info, const char* vendorString) +{ + const uint32_t* a = reinterpret_cast<const uint32_t*>(info.text); + const uint32_t* b = reinterpret_cast<const uint32_t*>(vendorString); + + return (a[0] == b[0]) & + (a[1] == b[1]) & + (a[2] == b[2]) ; +} + +static inline void simplifyBrandString(char* s) +{ + // Always clear the current character in the buffer. This ensures that there + // is no garbage after the string NULL terminator. + char* d = s; + + char prev = 0; + char curr = s[0]; + s[0] = '\0'; + + for (;;) + { + if (curr == 0) break; + + if (curr == ' ') + { + if (prev == '@') goto _Skip; + if (s[1] == ' ' || s[1] == '@') goto _Skip; + } + + d[0] = curr; + d++; + prev = curr; + +_Skip: + curr = *++s; + s[0] = '\0'; + } + + d[0] = '\0'; +} + +void detectCpuInfo(CpuInfo* i) ASMJIT_NOTHROW +{ + uint32_t a; + + // First clear our struct + memset(i, 0, sizeof(CpuInfo)); + memcpy(i->vendor, "Unknown", 8); + + i->numberOfProcessors = detectNumberOfProcessors(); + +#if defined(ASMJIT_X86) || defined(ASMJIT_X64) + CpuId out; + + // Get vendor string + cpuid(0, &out); + + memcpy(i->vendor, &out.ebx, 4); + memcpy(i->vendor + 4, &out.edx, 4); + memcpy(i->vendor + 8, &out.ecx, 4); + + for (a = 0; a < 3; a++) + { + if (cpuVencorEq(cpuVendorInfo[a], i->vendor)) + { + i->vendorId = cpuVendorInfo[a].id; + break; + } + } + + // get feature flags in ecx/edx, and family/model in eax + cpuid(1, &out); + + // family and model fields + i->family = (out.eax >> 8) & 0x0F; + i->model = (out.eax >> 4) & 0x0F; + i->stepping = (out.eax ) & 0x0F; + + // use extended family and model fields + if (i->family == 0x0F) + { + i->family += ((out.eax >> 20) & 0xFF); + i->model += ((out.eax >> 16) & 0x0F) << 4; + } + + i->x86ExtendedInfo.processorType = ((out.eax >> 12) & 0x03); + i->x86ExtendedInfo.brandIndex = ((out.ebx ) & 0xFF); + i->x86ExtendedInfo.flushCacheLineSize = ((out.ebx >> 8) & 0xFF) * 8; + i->x86ExtendedInfo.maxLogicalProcessors = ((out.ebx >> 16) & 0xFF); + i->x86ExtendedInfo.apicPhysicalId = ((out.ebx >> 24) & 0xFF); + + if (out.ecx & 0x00000001U) i->features |= CPU_FEATURE_SSE3; + if (out.ecx & 0x00000002U) i->features |= CPU_FEATURE_PCLMULDQ; + if (out.ecx & 0x00000008U) i->features |= CPU_FEATURE_MONITOR_MWAIT; + if (out.ecx & 0x00000200U) i->features |= CPU_FEATURE_SSSE3; + if (out.ecx & 0x00002000U) i->features |= CPU_FEATURE_CMPXCHG16B; + if (out.ecx & 0x00080000U) i->features |= CPU_FEATURE_SSE4_1; + if (out.ecx & 0x00100000U) i->features |= CPU_FEATURE_SSE4_2; + if (out.ecx & 0x00400000U) i->features |= CPU_FEATURE_MOVBE; + if (out.ecx & 0x00800000U) i->features |= CPU_FEATURE_POPCNT; + if (out.ecx & 0x10000000U) i->features |= CPU_FEATURE_AVX; + + if (out.edx & 0x00000010U) i->features |= CPU_FEATURE_RDTSC; + if (out.edx & 0x00000100U) i->features |= CPU_FEATURE_CMPXCHG8B; + if (out.edx & 0x00008000U) i->features |= CPU_FEATURE_CMOV; + if (out.edx & 0x00800000U) i->features |= CPU_FEATURE_MMX; + if (out.edx & 0x01000000U) i->features |= CPU_FEATURE_FXSR; + if (out.edx & 0x02000000U) i->features |= CPU_FEATURE_SSE | CPU_FEATURE_MMX_EXT; + if (out.edx & 0x04000000U) i->features |= CPU_FEATURE_SSE | CPU_FEATURE_SSE2; + if (out.edx & 0x10000000U) i->features |= CPU_FEATURE_MULTI_THREADING; + + if (i->vendorId == CPU_VENDOR_AMD && (out.edx & 0x10000000U)) + { + // AMD sets Multithreading to ON if it has more cores. + if (i->numberOfProcessors == 1) i->numberOfProcessors = 2; + } + + // This comment comes from V8 and I think that its important: + // + // Opteron Rev E has a bug in which on very rare occasions a locked + // instruction doesn't act as a read-acquire barrier if followed by a + // non-locked read-modify-write instruction. Rev F has this bug in + // pre-release versions, but not in versions released to customers, + // so we test only for Rev E, which is family 15, model 32..63 inclusive. + + if (i->vendorId == CPU_VENDOR_AMD && i->family == 15 && i->model >= 32 && i->model <= 63) + { + i->bugs |= CPU_BUG_AMD_LOCK_MB; + } + + // Calling cpuid with 0x80000000 as the in argument + // gets the number of valid extended IDs. + + cpuid(0x80000000, &out); + + uint32_t exIds = out.eax; + if (exIds > 0x80000004) exIds = 0x80000004; + + uint32_t* brand = reinterpret_cast<uint32_t*>(i->brand); + + for (a = 0x80000001; a <= exIds; a++) + { + cpuid(a, &out); + + switch (a) + { + case 0x80000001: + if (out.ecx & 0x00000001U) i->features |= CPU_FEATURE_LAHF_SAHF; + if (out.ecx & 0x00000020U) i->features |= CPU_FEATURE_LZCNT; + if (out.ecx & 0x00000040U) i->features |= CPU_FEATURE_SSE4_A; + if (out.ecx & 0x00000080U) i->features |= CPU_FEATURE_MSSE; + if (out.ecx & 0x00000100U) i->features |= CPU_FEATURE_PREFETCH; + + if (out.edx & 0x00100000U) i->features |= CPU_FEATURE_EXECUTE_DISABLE_BIT; + if (out.edx & 0x00200000U) i->features |= CPU_FEATURE_FFXSR; + if (out.edx & 0x00400000U) i->features |= CPU_FEATURE_MMX_EXT; + if (out.edx & 0x08000000U) i->features |= CPU_FEATURE_RDTSCP; + if (out.edx & 0x20000000U) i->features |= CPU_FEATURE_64_BIT; + if (out.edx & 0x40000000U) i->features |= CPU_FEATURE_3DNOW_EXT | CPU_FEATURE_MMX_EXT; + if (out.edx & 0x80000000U) i->features |= CPU_FEATURE_3DNOW; + break; + + case 0x80000002: + case 0x80000003: + case 0x80000004: + *brand++ = out.eax; + *brand++ = out.ebx; + *brand++ = out.ecx; + *brand++ = out.edx; + break; + + default: + // Additional features can be detected in the future. + break; + } + } + + // Simplify the brand string (remove unnecessary spaces to make it printable). + simplifyBrandString(i->brand); + +#endif // ASMJIT_X86 || ASMJIT_X64 +} +#else +void detectCpuInfo(CpuInfo* i) ASMJIT_NOTHROW +{ + memset(i, 0, sizeof(CpuInfo)); +} +#endif + +struct ASMJIT_HIDDEN CpuInfoStatic +{ + CpuInfoStatic() ASMJIT_NOTHROW { detectCpuInfo(&i); } + + CpuInfo i; +}; + +CpuInfo* getCpuInfo() ASMJIT_NOTHROW +{ + static CpuInfoStatic i; + return &i.i; +} + +} // AsmJit + +// [Api-End] +#include "ApiEnd.h" diff --git a/lib/AsmJit/CpuInfo.h b/lib/AsmJit/CpuInfo.h new file mode 100644 index 0000000..91674d6 --- /dev/null +++ b/lib/AsmJit/CpuInfo.h @@ -0,0 +1,245 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_CPUINFO_H +#define _ASMJIT_CPUINFO_H + +// [Dependencies] +#include "Build.h" + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +//! @addtogroup AsmJit_CpuInfo +//! @{ + +// ============================================================================ +// [AsmJit::CpuId] +// ============================================================================ + +#if defined(ASMJIT_X86) || defined(ASMJIT_X64) +//! @brief Structure (union) used by cpuid() function. +union CpuId +{ + //! @brief cpuid results array(eax, ebx, ecx and edx registers). + uint32_t i[4]; + + struct + { + //! @brief cpuid result in eax register. + uint32_t eax; + //! @brief cpuid result in ebx register. + uint32_t ebx; + //! @brief cpuid result in ecx register. + uint32_t ecx; + //! @brief cpuid result in edx register. + uint32_t edx; + }; +}; + +//! @brief Calls CPUID instruction with eax == @a in and returns result to @a out. +//! +//! @c cpuid() function has one input parameter that is passed to cpuid through +//! eax register and results in four output values representing result of cpuid +//! instruction (eax, ebx, ecx and edx registers). +ASMJIT_API void cpuid(uint32_t in, CpuId* out) ASMJIT_NOTHROW; +#endif // ASMJIT_X86 || ASMJIT_X64 + +// ============================================================================ +// [AsmJit::CPU_VENDOR] +// ============================================================================ + +//! @brief Cpu vendor IDs. +//! +//! Cpu vendor IDs are specific for AsmJit library. Vendor ID is not directly +//! read from cpuid result, instead it's based on CPU vendor string. +enum CPU_VENDOR +{ + //! @brief Unknown CPU vendor. + CPU_VENDOR_UNKNOWN = 0, + + //! @brief Intel CPU vendor. + CPU_VENDOR_INTEL = 1, + //! @brief AMD CPU vendor. + CPU_VENDOR_AMD = 2, + //! @brief National Semiconductor CPU vendor (applies also to Cyrix processors). + CPU_VENDOR_NSM = 3, + //! @brief Transmeta CPU vendor. + CPU_VENDOR_TRANSMETA = 4, + //! @brief VIA CPU vendor. + CPU_VENDOR_VIA = 5 +}; + +// ============================================================================ +// [AsmJit::CPU_FEATURE] +// ============================================================================ + +//! @brief X86/X64 CPU features. +enum CPU_FEATURE +{ + //! @brief Cpu has RDTSC instruction. + CPU_FEATURE_RDTSC = 1U << 0, + //! @brief Cpu has RDTSCP instruction. + CPU_FEATURE_RDTSCP = 1U << 1, + //! @brief Cpu has CMOV instruction (conditional move) + CPU_FEATURE_CMOV = 1U << 2, + //! @brief Cpu has CMPXCHG8B instruction + CPU_FEATURE_CMPXCHG8B = 1U << 3, + //! @brief Cpu has CMPXCHG16B instruction (64-bit processors) + CPU_FEATURE_CMPXCHG16B = 1U << 4, + //! @brief Cpu has CLFUSH instruction + CPU_FEATURE_CLFLUSH = 1U << 5, + //! @brief Cpu has PREFETCH instruction + CPU_FEATURE_PREFETCH = 1U << 6, + //! @brief Cpu supports LAHF and SAHF instrictions. + CPU_FEATURE_LAHF_SAHF = 1U << 7, + //! @brief Cpu supports FXSAVE and FXRSTOR instructions. + CPU_FEATURE_FXSR = 1U << 8, + //! @brief Cpu supports FXSAVE and FXRSTOR instruction optimizations (FFXSR). + CPU_FEATURE_FFXSR = 1U << 9, + //! @brief Cpu has MMX. + CPU_FEATURE_MMX = 1U << 10, + //! @brief Cpu has extended MMX. + CPU_FEATURE_MMX_EXT = 1U << 11, + //! @brief Cpu has 3dNow! + CPU_FEATURE_3DNOW = 1U << 12, + //! @brief Cpu has enchanced 3dNow! + CPU_FEATURE_3DNOW_EXT = 1U << 13, + //! @brief Cpu has SSE. + CPU_FEATURE_SSE = 1U << 14, + //! @brief Cpu has SSE2. + CPU_FEATURE_SSE2 = 1U << 15, + //! @brief Cpu has SSE3. + CPU_FEATURE_SSE3 = 1U << 16, + //! @brief Cpu has Supplemental SSE3 (SSSE3). + CPU_FEATURE_SSSE3 = 1U << 17, + //! @brief Cpu has SSE4.A. + CPU_FEATURE_SSE4_A = 1U << 18, + //! @brief Cpu has SSE4.1. + CPU_FEATURE_SSE4_1 = 1U << 19, + //! @brief Cpu has SSE4.2. + CPU_FEATURE_SSE4_2 = 1U << 20, + //! @brief Cpu has AVX. + CPU_FEATURE_AVX = 1U << 22, + //! @brief Cpu has Misaligned SSE (MSSE). + CPU_FEATURE_MSSE = 1U << 23, + //! @brief Cpu supports MONITOR and MWAIT instructions. + CPU_FEATURE_MONITOR_MWAIT = 1U << 24, + //! @brief Cpu supports MOVBE instruction. + CPU_FEATURE_MOVBE = 1U << 25, + //! @brief Cpu supports POPCNT instruction. + CPU_FEATURE_POPCNT = 1U << 26, + //! @brief Cpu supports LZCNT instruction. + CPU_FEATURE_LZCNT = 1U << 27, + //! @brief Cpu supports PCLMULDQ set of instructions. + CPU_FEATURE_PCLMULDQ = 1U << 28, + //! @brief Cpu supports multithreading. + CPU_FEATURE_MULTI_THREADING = 1U << 29, + //! @brief Cpu supports execute disable bit (execute protection). + CPU_FEATURE_EXECUTE_DISABLE_BIT = 1U << 30, + //! @brief 64-bit CPU. + CPU_FEATURE_64_BIT = 1U << 31 +}; + +// ============================================================================ +// [AsmJit::CPU_BUG] +// ============================================================================ + +//! @brief X86/X64 CPU bugs. +enum CPU_BUG +{ + //! @brief Whether the processor contains bug seen in some + //! AMD-Opteron processors. + CPU_BUG_AMD_LOCK_MB = 1U << 0 +}; + +// ============================================================================ +// [AsmJit::CpuInfo] +// ============================================================================ + +//! @brief Informations about host cpu. +struct ASMJIT_HIDDEN CpuInfo +{ + //! @brief Cpu short vendor string. + char vendor[16]; + //! @brief Cpu long vendor string (brand). + char brand[64]; + //! @brief Cpu vendor id (see @c AsmJit::CpuInfo::VendorId enum). + uint32_t vendorId; + //! @brief Cpu family ID. + uint32_t family; + //! @brief Cpu model ID. + uint32_t model; + //! @brief Cpu stepping. + uint32_t stepping; + //! @brief Number of processors or cores. + uint32_t numberOfProcessors; + //! @brief Cpu features bitfield, see @c AsmJit::CpuInfo::Feature enum). + uint32_t features; + //! @brief Cpu bugs bitfield, see @c AsmJit::CpuInfo::Bug enum). + uint32_t bugs; + +#if defined(ASMJIT_X86) || defined(ASMJIT_X64) + //! @brief Extended information for x86/x64 compatible processors. + struct X86ExtendedInfo + { + //! @brief Processor type. + uint32_t processorType; + //! @brief Brand index. + uint32_t brandIndex; + //! @brief Flush cache line size in bytes. + uint32_t flushCacheLineSize; + //! @brief Maximum number of addressable IDs for logical processors. + uint32_t maxLogicalProcessors; + //! @brief Initial APIC ID. + uint32_t apicPhysicalId; + }; + //! @brief Extended information for x86/x64 compatible processors. + X86ExtendedInfo x86ExtendedInfo; +#endif // ASMJIT_X86 || ASMJIT_X64 +}; + +//! @brief Detect CPU features to CpuInfo structure @a i. +//! +//! @sa @c CpuInfo. +ASMJIT_API void detectCpuInfo(CpuInfo* i) ASMJIT_NOTHROW; + +//! @brief Return CpuInfo (detection is done only once). +//! +//! @sa @c CpuInfo. +ASMJIT_API CpuInfo* getCpuInfo() ASMJIT_NOTHROW; + +//! @} + +} // AsmJit namespace + +// [Api-End] +#include "ApiEnd.h" + +// [Guard] +#endif // _ASMJIT_CPUINFO_H diff --git a/lib/AsmJit/Defs.cpp b/lib/AsmJit/Defs.cpp new file mode 100644 index 0000000..cfb6d63 --- /dev/null +++ b/lib/AsmJit/Defs.cpp @@ -0,0 +1,68 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Dependencies] +#include "Defs.h" + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +const char* getErrorCodeAsString(uint32_t error) ASMJIT_NOTHROW +{ + static const char* errorMessage[] = { + "No error", + + "No heap memory", + "No virtual memory", + + "Unknown instruction", + "Illegal instruction", + "Illegal addressing", + "Illegal short jump", + + "No function defined", + "Incomplete function", + + "Not enough registers", + "Registers overlap", + + "Incompatible argument", + "Incompatible return value", + + "Unknown error" + }; + + // Saturate error code to be able to use errorMessage[]. + if (error > _ERROR_COUNT) error = _ERROR_COUNT; + + return errorMessage[error]; +} + +} // AsmJit + +// [Api-End] +#include "ApiEnd.h" diff --git a/lib/AsmJit/Defs.h b/lib/AsmJit/Defs.h new file mode 100644 index 0000000..2b488d3 --- /dev/null +++ b/lib/AsmJit/Defs.h @@ -0,0 +1,427 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_DEFS_H +#define _ASMJIT_DEFS_H + +// [Dependencies] +#include "Build.h" + +namespace AsmJit { + +//! @addtogroup AsmJit_Core +//! @{ + +// ============================================================================ +// [AsmJit::MEMORY_ALLOC_TYPE] +// ============================================================================ + +//! @brief Types of allocation used by @c AsmJit::MemoryManager::alloc() method. +enum MEMORY_ALLOC_TYPE +{ + //! @brief Allocate memory that can be freed by @c AsmJit::MemoryManager::free() + //! method. + MEMORY_ALLOC_FREEABLE, + //! @brief Allocate permanent memory that will be never freed. + MEMORY_ALLOC_PERMANENT +}; + +// ============================================================================ +// [AsmJit::ERROR_CODE] +// ============================================================================ + +//! @brief AsmJit error codes. +enum ERROR_CODE +{ + //! @brief No error (success). + //! + //! This is default state and state you want. + ERROR_NONE = 0, + + //! @brief Memory allocation error (@c ASMJIT_MALLOC returned @c NULL). + ERROR_NO_HEAP_MEMORY = 1, + //! @brief Virtual memory allocation error (@c VirtualMemory returned @c NULL). + ERROR_NO_VIRTUAL_MEMORY = 2, + + //! @brief Unknown instruction. This happens only if instruction code is + //! out of bounds. Shouldn't happen. + ERROR_UNKNOWN_INSTRUCTION = 3, + //! @brief Illegal instruction, usually generated by AsmJit::AssemblerCore + //! class when emitting instruction opcode. If this error is generated the + //! target buffer is not affected by this invalid instruction. + //! + //! You can also get this error code if you are under x64 (64-bit x86) and + //! you tried to decode instruction using AH, BH, CH or DH register with REX + //! prefix. These registers can't be accessed if REX prefix is used and AsmJit + //! didn't check for this situation in intrinsics (@c Compiler takes care of + //! this and rearrange registers if needed). + //! + //! Examples that will raise @c ERROR_ILLEGAL_INSTRUCTION error (a is + //! @c Assembler instance): + //! + //! @code + //! a.mov(dword_ptr(eax), al); // Invalid address size. + //! a.mov(byte_ptr(r10), ah); // Undecodable instruction (AH used with r10 + //! // which can be encoded only using REX prefix) + //! @endcode + //! + //! @note In debug mode you get assertion failure instead of setting error + //! code. + ERROR_ILLEGAL_INSTRUCTION = 4, + //! @brief Illegal addressing used (unencodable). + ERROR_ILLEGAL_ADDRESING = 5, + //! @brief Short jump instruction used, but displacement is out of bounds. + ERROR_ILLEGAL_SHORT_JUMP = 6, + + //! @brief No function defined. + ERROR_NO_FUNCTION = 7, + //! @brief Function generation is not finished by using @c Compiler::endFunction() + //! or something bad happened during generation related to function. This can + //! be missing emittable, etc... + ERROR_INCOMPLETE_FUNCTION = 8, + + //! @brief Compiler can't allocate registers, because all of them are used. + //! + //! @note AsmJit is able to spill registers so this error really shouldn't + //! happen unless all registers have priority 0 (which means never spill). + ERROR_NOT_ENOUGH_REGISTERS = 9, + //! @brief Compiler can't allocate one register to multiple destinations. + //! + //! This error can only happen using special instructions like cmpxchg8b and + //! others where there are more destination operands (implicit). + ERROR_REGISTERS_OVERLAP = 10, + + //! @brief Tried to call function using incompatible argument. + ERROR_INCOMPATIBLE_ARGUMENT = 11, + //! @brief Incompatible return value. + ERROR_INCOMPATIBLE_RETURN_VALUE = 12, + + //! @brief Count of error codes by AsmJit. Can grow in future. + _ERROR_COUNT +}; + +// ============================================================================ +// [AsmJit::OPERAND_TYPE] +// ============================================================================ + +//! @brief Operand types that can be encoded in @c Op operand. +enum OPERAND_TYPE +{ + //! @brief Operand is none, used only internally (not initialized Operand). + //! + //! This operand is not valid. + OPERAND_NONE = 0x00, + + //! @brief Operand is register. + OPERAND_REG = 0x01, + //! @brief Operand is memory. + OPERAND_MEM = 0x02, + //! @brief Operand is immediate. + OPERAND_IMM = 0x04, + //! @brief Operand is label. + OPERAND_LABEL = 0x08, + //! @brief Operand is variable. + OPERAND_VAR = 0x10 +}; + +// ============================================================================ +// [AsmJit::OPERAND_MEM_TYPE] +// ============================================================================ + +//! @brief Type of memory operand. +enum OPERAND_MEM_TYPE +{ + //! @brief Operand is combination of register(s) and displacement (native). + OPERAND_MEM_NATIVE = 0, + //! @brief Operand is label. + OPERAND_MEM_LABEL = 1, + //! @brief Operand is absolute memory location (supported mainly in 32-bit + //! mode) + OPERAND_MEM_ABSOLUTE = 2, +}; + +// ============================================================================ +// [AsmJit::PROPERTY] +// ============================================================================ + +//! @brief @c Assembler/Compiler properties. +enum PROPERTY +{ + //! @brief Optimize align for current processor. + //! + //! Default: @c true. + PROPERTY_OPTIMIZE_ALIGN = 0, + + //! @brief Emit hints added to jcc() instructions. + //! + //! Default: @c true. + //! + //! @note This is X86/X64 property only. + PROPERTY_JUMP_HINTS = 1 +}; + +// ============================================================================ +// [AsmJit::SIZE] +// ============================================================================ + +//! @brief Size of registers and pointers. +enum SIZE +{ + //! @brief 1 byte size. + SIZE_BYTE = 1, + //! @brief 2 bytes size. + SIZE_WORD = 2, + //! @brief 4 bytes size. + SIZE_DWORD = 4, + //! @brief 8 bytes size. + SIZE_QWORD = 8, + //! @brief 10 bytes size. + SIZE_TWORD = 10, + //! @brief 16 bytes size. + SIZE_DQWORD = 16 +}; + +// ============================================================================ +// [EMITTABLE_TYPE] +// ============================================================================ + +//! @brief Emmitable type. +//! +//! For each emittable that is used by @c Compiler must be defined it's type. +//! Compiler can optimize instruction stream by analyzing emittables and each +//! type is hint for it. The most used emittables are instructions +//! (@c EMITTABLE_INSTRUCTION). +enum EMITTABLE_TYPE +{ + //! @brief Emittable is invalid (can't be used). + EMITTABLE_NONE = 0, + //! @brief Emittable is dummy (used as a mark) (@ref EDummy). + EMITTABLE_DUMMY, + //! @brief Emittable is comment (no code) (@ref EComment). + EMITTABLE_COMMENT, + //! @brief Emittable is embedded data (@ref EData). + EMITTABLE_EMBEDDED_DATA, + //! @brief Emittable is .align directive (@ref EAlign). + EMITTABLE_ALIGN, + //! @brief Emittable is variable hint (alloc, spill, use, unuse, ...) (@ref EVariableHint). + EMITTABLE_VARIABLE_HINT, + //! @brief Emittable is single instruction (@ref EInstruction). + EMITTABLE_INSTRUCTION, + //! @brief Emittable is block of instructions. + EMITTABLE_BLOCK, + //! @brief Emittable is function declaration (@ref EFunction). + EMITTABLE_FUNCTION, + //! @brief Emittable is function prolog (@ref EProlog). + EMITTABLE_PROLOG, + //! @brief Emittable is function epilog (@ref EEpilog). + EMITTABLE_EPILOG, + //! @brief Emittable is end of function (@ref EFunctionEnd). + EMITTABLE_FUNCTION_END, + //! @brief Emittable is target (bound label). + EMITTABLE_TARGET, + //! @brief Emittable is jump table (@ref EJmp). + EMITTABLE_JUMP_TABLE, + //! @brief Emittable is function call (@ref ECall). + EMITTABLE_CALL, + //! @brief Emittable is return (@ref ERet). + EMITTABLE_RET +}; + +// ============================================================================ +// [AsmJit::VARIABLE_STATE] +// ============================================================================ + +//! @brief State of variable. +//! +//! @note State of variable is used only during make process and it's not +//! visible to the developer. +enum VARIABLE_STATE +{ + //! @brief Variable is currently not used. + VARIABLE_STATE_UNUSED = 0, + + //! @brief Variable is in register. + //! + //! Variable is currently allocated in register. + VARIABLE_STATE_REGISTER = 1, + + //! @brief Variable is in memory location or spilled. + //! + //! Variable was spilled from register to memory or variable is used for + //! memory only storage. + VARIABLE_STATE_MEMORY = 2 +}; + +// ============================================================================ +// [AsmJit::VARIABLE_ALLOC_FLAGS] +// ============================================================================ + +//! @brief Variable alloc mode. +enum VARIABLE_ALLOC +{ + //! @brief Allocating variable to read only. + //! + //! Read only variables are used to optimize variable spilling. If variable + //! is some time ago deallocated and it's not marked as changed (so it was + //! all the life time read only) then spill is simply NOP (no mov instruction + //! is generated to move it to it's home memory location). + VARIABLE_ALLOC_READ = 0x01, + + //! @brief Allocating variable to write only (overwrite). + //! + //! Overwriting means that if variable is in memory, there is no generated + //! instruction to move variable from memory to register, because that + //! register will be overwritten by next instruction. This is used as a + //! simple optimization to improve generated code by @c Compiler. + VARIABLE_ALLOC_WRITE = 0x02, + + //! @brief Allocating variable to read / write. + //! + //! Variable allocated for read / write is marked as changed. This means that + //! if variable must be later spilled into memory, mov (or similar) + //! instruction will be generated. + VARIABLE_ALLOC_READWRITE = 0x03, + + //! @brief Variable can be allocated in register. + VARIABLE_ALLOC_REGISTER = 0x04, + + //! @brief Variable can be allocated in memory. + VARIABLE_ALLOC_MEMORY = 0x08, + + //! @brief Unuse the variable after use. + VARIABLE_ALLOC_UNUSE_AFTER_USE = 0x10, + + //! @brief Variable can be allocated only to one register (special allocation). + VARIABLE_ALLOC_SPECIAL = 0x20 +}; + +// ============================================================================ +// [AsmJit::VARIABLE_ALLOC_POLICY] +// ============================================================================ + +//! @brief Variable allocation method. +//! +//! Variable allocation method is used by compiler and it means if compiler +//! should first allocate preserved registers or not. Preserved registers are +//! registers that must be saved / restored by generated function. +//! +//! This option is for people who are calling C/C++ functions from JIT code so +//! Compiler can recude generating push/pop sequences before and after call, +//! respectively. +enum VARIABLE_ALLOC_POLICY +{ + //! @brief Allocate preserved registers first. + VARIABLE_ALLOC_PRESERVED_FIRST, + //! @brief Allocate preserved registers last (default). + VARIABLE_ALLOC_PRESERVED_LAST +}; + +// ============================================================================ +// [AsmJit::FUNCTION_HINT] +// ============================================================================ + +//! @brief Function hints. +enum FUNCTION_HINT +{ + //! @brief Use push/pop sequences instead of mov sequences in function prolog + //! and epilog. + FUNCTION_HINT_PUSH_POP_SEQUENCE = 0, + //! @brief Make naked function (without using ebp/erp in prolog / epilog). + FUNCTION_HINT_NAKED = 1, + //! @brief Add emms instruction to the function epilog. + FUNCTION_HINT_EMMS = 2, + //! @brief Add sfence instruction to the function epilog. + FUNCTION_HINT_SFENCE = 3, + //! @brief Add lfence instruction to the function epilog. + FUNCTION_HINT_LFENCE = 4 +}; + +// ============================================================================ +// [AsmJit::ARGUMENT_DIR] +// ============================================================================ + +//! @brief Arguments direction used by @c Function. +enum ARGUMENT_DIR +{ + //! @brief Arguments are passed left to right. + //! + //! This arguments direction is unusual to C programming, it's used by pascal + //! compilers and in some calling conventions by Borland compiler). + ARGUMENT_DIR_LEFT_TO_RIGHT = 0, + //! @brief Arguments are passer right ro left + //! + //! This is default argument direction in C programming. + ARGUMENT_DIR_RIGHT_TO_LEFT = 1 +}; + +// ============================================================================ +// [AsmJit::Constants] +// ============================================================================ + +enum { + //! @brief Invalid operand identifier. + INVALID_VALUE = 0xFFFFFFFF, + + //! @brief Operand id value mask (part used for IDs). + OPERAND_ID_VALUE_MASK = 0x3FFFFFFF, + //! @brief Operand id type mask (part used for operand type). + OPERAND_ID_TYPE_MASK = 0xC0000000, + //! @brief Label operand mark id. + OPERAND_ID_TYPE_LABEL = 0x40000000, + //! @brief Variable operand mark id. + OPERAND_ID_TYPE_VAR = 0x80000000, +}; + +enum { + //! @brief Maximum allowed arguments per function declaration / call. + FUNC_MAX_ARGS = 32 +}; + +// ============================================================================ +// [AsmJit::API] +// ============================================================================ + +//! @brief Translates error code (see @c ERROR_CODE) into text representation. +ASMJIT_API const char* getErrorCodeAsString(uint32_t error) ASMJIT_NOTHROW; + +//! @} + +} // AsmJit namespace + +// ============================================================================ +// [Platform Specific] +// +// Following enums must be declared by platform specific header: +// - CALL_CONV - Calling convention. +// - VARIABLE_TYPE - Variable type. +// ============================================================================ + +#if defined(ASMJIT_X86) || defined(ASMJIT_X64) +#include "DefsX86X64.h" +#endif // ASMJIT_X86 || ASMJIT_X64 + +// [Guard] +#endif // _ASMJIT_DEFS_H diff --git a/lib/AsmJit/DefsX86X64.cpp b/lib/AsmJit/DefsX86X64.cpp new file mode 100644 index 0000000..072d96b --- /dev/null +++ b/lib/AsmJit/DefsX86X64.cpp @@ -0,0 +1,1859 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Dependencies] +#include "Defs.h" + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +// ============================================================================ +// [AsmJit::ConditionToInstruction] +// ============================================================================ + +const uint32_t ConditionToInstruction::_jcctable[16] = +{ + INST_JO, + INST_JNO, + INST_JB, + INST_JAE, + INST_JE, + INST_JNE, + INST_JBE, + INST_JA, + INST_JS, + INST_JNS, + INST_JPE, + INST_JPO, + INST_JL, + INST_JGE, + INST_JLE, + INST_JG +}; + +const uint32_t ConditionToInstruction::_cmovcctable[16] = +{ + INST_CMOVO, + INST_CMOVNO, + INST_CMOVB, + INST_CMOVAE, + INST_CMOVE, + INST_CMOVNE, + INST_CMOVBE, + INST_CMOVA, + INST_CMOVS, + INST_CMOVNS, + INST_CMOVPE, + INST_CMOVPO, + INST_CMOVL, + INST_CMOVGE, + INST_CMOVLE, + INST_CMOVG +}; + +const uint32_t ConditionToInstruction::_setcctable[16] = +{ + INST_SETO, + INST_SETNO, + INST_SETB, + INST_SETAE, + INST_SETE, + INST_SETNE, + INST_SETBE, + INST_SETA, + INST_SETS, + INST_SETNS, + INST_SETPE, + INST_SETPO, + INST_SETL, + INST_SETGE, + INST_SETLE, + INST_SETG +}; + +// ============================================================================ +// [AsmJit::Instruction Name] +// ============================================================================ + +// Following {DATA SECTION} is auto-generated using InstructionDescription data. +// +// ${INSTRUCTION_DATA_BEGIN} +const char instructionName[] = + "adc\0" + "add\0" + "addpd\0" + "addps\0" + "addsd\0" + "addss\0" + "addsubpd\0" + "addsubps\0" + "amd_prefetch\0" + "amd_prefetchw\0" + "and\0" + "andnpd\0" + "andnps\0" + "andpd\0" + "andps\0" + "blendpd\0" + "blendps\0" + "blendvpd\0" + "blendvps\0" + "bsf\0" + "bsr\0" + "bswap\0" + "bt\0" + "btc\0" + "btr\0" + "bts\0" + "call\0" + "cbw\0" + "cdqe\0" + "clc\0" + "cld\0" + "clflush\0" + "cmc\0" + "cmova\0" + "cmovae\0" + "cmovb\0" + "cmovbe\0" + "cmovc\0" + "cmove\0" + "cmovg\0" + "cmovge\0" + "cmovl\0" + "cmovle\0" + "cmovna\0" + "cmovnae\0" + "cmovnb\0" + "cmovnbe\0" + "cmovnc\0" + "cmovne\0" + "cmovng\0" + "cmovnge\0" + "cmovnl\0" + "cmovnle\0" + "cmovno\0" + "cmovnp\0" + "cmovns\0" + "cmovnz\0" + "cmovo\0" + "cmovp\0" + "cmovpe\0" + "cmovpo\0" + "cmovs\0" + "cmovz\0" + "cmp\0" + "cmppd\0" + "cmpps\0" + "cmpsd\0" + "cmpss\0" + "cmpxchg\0" + "cmpxchg16b\0" + "cmpxchg8b\0" + "comisd\0" + "comiss\0" + "cpuid\0" + "crc32\0" + "cvtdq2pd\0" + "cvtdq2ps\0" + "cvtpd2dq\0" + "cvtpd2pi\0" + "cvtpd2ps\0" + "cvtpi2pd\0" + "cvtpi2ps\0" + "cvtps2dq\0" + "cvtps2pd\0" + "cvtps2pi\0" + "cvtsd2si\0" + "cvtsd2ss\0" + "cvtsi2sd\0" + "cvtsi2ss\0" + "cvtss2sd\0" + "cvtss2si\0" + "cvttpd2dq\0" + "cvttpd2pi\0" + "cvttps2dq\0" + "cvttps2pi\0" + "cvttsd2si\0" + "cvttss2si\0" + "cwde\0" + "daa\0" + "das\0" + "dec\0" + "div\0" + "divpd\0" + "divps\0" + "divsd\0" + "divss\0" + "dppd\0" + "dpps\0" + "emms\0" + "enter\0" + "extractps\0" + "f2xm1\0" + "fabs\0" + "fadd\0" + "faddp\0" + "fbld\0" + "fbstp\0" + "fchs\0" + "fclex\0" + "fcmovb\0" + "fcmovbe\0" + "fcmove\0" + "fcmovnb\0" + "fcmovnbe\0" + "fcmovne\0" + "fcmovnu\0" + "fcmovu\0" + "fcom\0" + "fcomi\0" + "fcomip\0" + "fcomp\0" + "fcompp\0" + "fcos\0" + "fdecstp\0" + "fdiv\0" + "fdivp\0" + "fdivr\0" + "fdivrp\0" + "femms\0" + "ffree\0" + "fiadd\0" + "ficom\0" + "ficomp\0" + "fidiv\0" + "fidivr\0" + "fild\0" + "fimul\0" + "fincstp\0" + "finit\0" + "fist\0" + "fistp\0" + "fisttp\0" + "fisub\0" + "fisubr\0" + "fld\0" + "fld1\0" + "fldcw\0" + "fldenv\0" + "fldl2e\0" + "fldl2t\0" + "fldlg2\0" + "fldln2\0" + "fldpi\0" + "fldz\0" + "fmul\0" + "fmulp\0" + "fnclex\0" + "fninit\0" + "fnop\0" + "fnsave\0" + "fnstcw\0" + "fnstenv\0" + "fnstsw\0" + "fpatan\0" + "fprem\0" + "fprem1\0" + "fptan\0" + "frndint\0" + "frstor\0" + "fsave\0" + "fscale\0" + "fsin\0" + "fsincos\0" + "fsqrt\0" + "fst\0" + "fstcw\0" + "fstenv\0" + "fstp\0" + "fstsw\0" + "fsub\0" + "fsubp\0" + "fsubr\0" + "fsubrp\0" + "ftst\0" + "fucom\0" + "fucomi\0" + "fucomip\0" + "fucomp\0" + "fucompp\0" + "fwait\0" + "fxam\0" + "fxch\0" + "fxrstor\0" + "fxsave\0" + "fxtract\0" + "fyl2x\0" + "fyl2xp1\0" + "haddpd\0" + "haddps\0" + "hsubpd\0" + "hsubps\0" + "idiv\0" + "imul\0" + "inc\0" + "int3\0" + "ja\0" + "jae\0" + "jb\0" + "jbe\0" + "jc\0" + "je\0" + "jg\0" + "jge\0" + "jl\0" + "jle\0" + "jna\0" + "jnae\0" + "jnb\0" + "jnbe\0" + "jnc\0" + "jne\0" + "jng\0" + "jnge\0" + "jnl\0" + "jnle\0" + "jno\0" + "jnp\0" + "jns\0" + "jnz\0" + "jo\0" + "jp\0" + "jpe\0" + "jpo\0" + "js\0" + "jz\0" + "jmp\0" + "lddqu\0" + "ldmxcsr\0" + "lahf\0" + "lea\0" + "leave\0" + "lfence\0" + "maskmovdqu\0" + "maskmovq\0" + "maxpd\0" + "maxps\0" + "maxsd\0" + "maxss\0" + "mfence\0" + "minpd\0" + "minps\0" + "minsd\0" + "minss\0" + "monitor\0" + "mov\0" + "movapd\0" + "movaps\0" + "movbe\0" + "movd\0" + "movddup\0" + "movdq2q\0" + "movdqa\0" + "movdqu\0" + "movhlps\0" + "movhpd\0" + "movhps\0" + "movlhps\0" + "movlpd\0" + "movlps\0" + "movmskpd\0" + "movmskps\0" + "movntdq\0" + "movntdqa\0" + "movnti\0" + "movntpd\0" + "movntps\0" + "movntq\0" + "movq\0" + "movq2dq\0" + "movsd\0" + "movshdup\0" + "movsldup\0" + "movss\0" + "movsx\0" + "movsxd\0" + "movupd\0" + "movups\0" + "movzx\0" + "mov_ptr\0" + "mpsadbw\0" + "mul\0" + "mulpd\0" + "mulps\0" + "mulsd\0" + "mulss\0" + "mwait\0" + "neg\0" + "nop\0" + "not\0" + "or\0" + "orpd\0" + "orps\0" + "pabsb\0" + "pabsd\0" + "pabsw\0" + "packssdw\0" + "packsswb\0" + "packusdw\0" + "packuswb\0" + "paddb\0" + "paddd\0" + "paddq\0" + "paddsb\0" + "paddsw\0" + "paddusb\0" + "paddusw\0" + "paddw\0" + "palignr\0" + "pand\0" + "pandn\0" + "pause\0" + "pavgb\0" + "pavgw\0" + "pblendvb\0" + "pblendw\0" + "pcmpeqb\0" + "pcmpeqd\0" + "pcmpeqq\0" + "pcmpeqw\0" + "pcmpestri\0" + "pcmpestrm\0" + "pcmpgtb\0" + "pcmpgtd\0" + "pcmpgtq\0" + "pcmpgtw\0" + "pcmpistri\0" + "pcmpistrm\0" + "pextrb\0" + "pextrd\0" + "pextrq\0" + "pextrw\0" + "pf2id\0" + "pf2iw\0" + "pfacc\0" + "pfadd\0" + "pfcmpeq\0" + "pfcmpge\0" + "pfcmpgt\0" + "pfmax\0" + "pfmin\0" + "pfmul\0" + "pfnacc\0" + "pfpnacc\0" + "pfrcp\0" + "pfrcpit1\0" + "pfrcpit2\0" + "pfrsqit1\0" + "pfrsqrt\0" + "pfsub\0" + "pfsubr\0" + "phaddd\0" + "phaddsw\0" + "phaddw\0" + "phminposuw\0" + "phsubd\0" + "phsubsw\0" + "phsubw\0" + "pi2fd\0" + "pi2fw\0" + "pinsrb\0" + "pinsrd\0" + "pinsrq\0" + "pinsrw\0" + "pmaddubsw\0" + "pmaddwd\0" + "pmaxsb\0" + "pmaxsd\0" + "pmaxsw\0" + "pmaxub\0" + "pmaxud\0" + "pmaxuw\0" + "pminsb\0" + "pminsd\0" + "pminsw\0" + "pminub\0" + "pminud\0" + "pminuw\0" + "pmovmskb\0" + "pmovsxbd\0" + "pmovsxbq\0" + "pmovsxbw\0" + "pmovsxdq\0" + "pmovsxwd\0" + "pmovsxwq\0" + "pmovzxbd\0" + "pmovzxbq\0" + "pmovzxbw\0" + "pmovzxdq\0" + "pmovzxwd\0" + "pmovzxwq\0" + "pmuldq\0" + "pmulhrsw\0" + "pmulhuw\0" + "pmulhw\0" + "pmulld\0" + "pmullw\0" + "pmuludq\0" + "pop\0" + "popad\0" + "popcnt\0" + "popfd\0" + "popfq\0" + "por\0" + "prefetch\0" + "psadbw\0" + "pshufb\0" + "pshufd\0" + "pshufw\0" + "pshufhw\0" + "pshuflw\0" + "psignb\0" + "psignd\0" + "psignw\0" + "pslld\0" + "pslldq\0" + "psllq\0" + "psllw\0" + "psrad\0" + "psraw\0" + "psrld\0" + "psrldq\0" + "psrlq\0" + "psrlw\0" + "psubb\0" + "psubd\0" + "psubq\0" + "psubsb\0" + "psubsw\0" + "psubusb\0" + "psubusw\0" + "psubw\0" + "pswapd\0" + "ptest\0" + "punpckhbw\0" + "punpckhdq\0" + "punpckhqdq\0" + "punpckhwd\0" + "punpcklbw\0" + "punpckldq\0" + "punpcklqdq\0" + "punpcklwd\0" + "push\0" + "pushad\0" + "pushfd\0" + "pushfq\0" + "pxor\0" + "rcl\0" + "rcpps\0" + "rcpss\0" + "rcr\0" + "rdtsc\0" + "rdtscp\0" + "rep lodsb\0" + "rep lodsd\0" + "rep lodsq\0" + "rep lodsw\0" + "rep movsb\0" + "rep movsd\0" + "rep movsq\0" + "rep movsw\0" + "rep stosb\0" + "rep stosd\0" + "rep stosq\0" + "rep stosw\0" + "repe cmpsb\0" + "repe cmpsd\0" + "repe cmpsq\0" + "repe cmpsw\0" + "repe scasb\0" + "repe scasd\0" + "repe scasq\0" + "repe scasw\0" + "repne cmpsb\0" + "repne cmpsd\0" + "repne cmpsq\0" + "repne cmpsw\0" + "repne scasb\0" + "repne scasd\0" + "repne scasq\0" + "repne scasw\0" + "ret\0" + "rol\0" + "ror\0" + "roundpd\0" + "roundps\0" + "roundsd\0" + "roundss\0" + "rsqrtps\0" + "rsqrtss\0" + "sahf\0" + "sal\0" + "sar\0" + "sbb\0" + "seta\0" + "setae\0" + "setb\0" + "setbe\0" + "setc\0" + "sete\0" + "setg\0" + "setge\0" + "setl\0" + "setle\0" + "setna\0" + "setnae\0" + "setnb\0" + "setnbe\0" + "setnc\0" + "setne\0" + "setng\0" + "setnge\0" + "setnl\0" + "setnle\0" + "setno\0" + "setnp\0" + "setns\0" + "setnz\0" + "seto\0" + "setp\0" + "setpe\0" + "setpo\0" + "sets\0" + "setz\0" + "sfence\0" + "shl\0" + "shld\0" + "shr\0" + "shrd\0" + "shufpd\0" + "shufps\0" + "sqrtpd\0" + "sqrtps\0" + "sqrtsd\0" + "sqrtss\0" + "stc\0" + "std\0" + "stmxcsr\0" + "sub\0" + "subpd\0" + "subps\0" + "subsd\0" + "subss\0" + "test\0" + "ucomisd\0" + "ucomiss\0" + "ud2\0" + "unpckhpd\0" + "unpckhps\0" + "unpcklpd\0" + "unpcklps\0" + "xadd\0" + "xchg\0" + "xor\0" + "xorpd\0" + "xorps\0" + ; + +#define INST_ADC_INDEX 0 +#define INST_ADD_INDEX 4 +#define INST_ADDPD_INDEX 8 +#define INST_ADDPS_INDEX 14 +#define INST_ADDSD_INDEX 20 +#define INST_ADDSS_INDEX 26 +#define INST_ADDSUBPD_INDEX 32 +#define INST_ADDSUBPS_INDEX 41 +#define INST_AMD_PREFETCH_INDEX 50 +#define INST_AMD_PREFETCHW_INDEX 63 +#define INST_AND_INDEX 77 +#define INST_ANDNPD_INDEX 81 +#define INST_ANDNPS_INDEX 88 +#define INST_ANDPD_INDEX 95 +#define INST_ANDPS_INDEX 101 +#define INST_BLENDPD_INDEX 107 +#define INST_BLENDPS_INDEX 115 +#define INST_BLENDVPD_INDEX 123 +#define INST_BLENDVPS_INDEX 132 +#define INST_BSF_INDEX 141 +#define INST_BSR_INDEX 145 +#define INST_BSWAP_INDEX 149 +#define INST_BT_INDEX 155 +#define INST_BTC_INDEX 158 +#define INST_BTR_INDEX 162 +#define INST_BTS_INDEX 166 +#define INST_CALL_INDEX 170 +#define INST_CBW_INDEX 175 +#define INST_CDQE_INDEX 179 +#define INST_CLC_INDEX 184 +#define INST_CLD_INDEX 188 +#define INST_CLFLUSH_INDEX 192 +#define INST_CMC_INDEX 200 +#define INST_CMOVA_INDEX 204 +#define INST_CMOVAE_INDEX 210 +#define INST_CMOVB_INDEX 217 +#define INST_CMOVBE_INDEX 223 +#define INST_CMOVC_INDEX 230 +#define INST_CMOVE_INDEX 236 +#define INST_CMOVG_INDEX 242 +#define INST_CMOVGE_INDEX 248 +#define INST_CMOVL_INDEX 255 +#define INST_CMOVLE_INDEX 261 +#define INST_CMOVNA_INDEX 268 +#define INST_CMOVNAE_INDEX 275 +#define INST_CMOVNB_INDEX 283 +#define INST_CMOVNBE_INDEX 290 +#define INST_CMOVNC_INDEX 298 +#define INST_CMOVNE_INDEX 305 +#define INST_CMOVNG_INDEX 312 +#define INST_CMOVNGE_INDEX 319 +#define INST_CMOVNL_INDEX 327 +#define INST_CMOVNLE_INDEX 334 +#define INST_CMOVNO_INDEX 342 +#define INST_CMOVNP_INDEX 349 +#define INST_CMOVNS_INDEX 356 +#define INST_CMOVNZ_INDEX 363 +#define INST_CMOVO_INDEX 370 +#define INST_CMOVP_INDEX 376 +#define INST_CMOVPE_INDEX 382 +#define INST_CMOVPO_INDEX 389 +#define INST_CMOVS_INDEX 396 +#define INST_CMOVZ_INDEX 402 +#define INST_CMP_INDEX 408 +#define INST_CMPPD_INDEX 412 +#define INST_CMPPS_INDEX 418 +#define INST_CMPSD_INDEX 424 +#define INST_CMPSS_INDEX 430 +#define INST_CMPXCHG_INDEX 436 +#define INST_CMPXCHG16B_INDEX 444 +#define INST_CMPXCHG8B_INDEX 455 +#define INST_COMISD_INDEX 465 +#define INST_COMISS_INDEX 472 +#define INST_CPUID_INDEX 479 +#define INST_CRC32_INDEX 485 +#define INST_CVTDQ2PD_INDEX 491 +#define INST_CVTDQ2PS_INDEX 500 +#define INST_CVTPD2DQ_INDEX 509 +#define INST_CVTPD2PI_INDEX 518 +#define INST_CVTPD2PS_INDEX 527 +#define INST_CVTPI2PD_INDEX 536 +#define INST_CVTPI2PS_INDEX 545 +#define INST_CVTPS2DQ_INDEX 554 +#define INST_CVTPS2PD_INDEX 563 +#define INST_CVTPS2PI_INDEX 572 +#define INST_CVTSD2SI_INDEX 581 +#define INST_CVTSD2SS_INDEX 590 +#define INST_CVTSI2SD_INDEX 599 +#define INST_CVTSI2SS_INDEX 608 +#define INST_CVTSS2SD_INDEX 617 +#define INST_CVTSS2SI_INDEX 626 +#define INST_CVTTPD2DQ_INDEX 635 +#define INST_CVTTPD2PI_INDEX 645 +#define INST_CVTTPS2DQ_INDEX 655 +#define INST_CVTTPS2PI_INDEX 665 +#define INST_CVTTSD2SI_INDEX 675 +#define INST_CVTTSS2SI_INDEX 685 +#define INST_CWDE_INDEX 695 +#define INST_DAA_INDEX 700 +#define INST_DAS_INDEX 704 +#define INST_DEC_INDEX 708 +#define INST_DIV_INDEX 712 +#define INST_DIVPD_INDEX 716 +#define INST_DIVPS_INDEX 722 +#define INST_DIVSD_INDEX 728 +#define INST_DIVSS_INDEX 734 +#define INST_DPPD_INDEX 740 +#define INST_DPPS_INDEX 745 +#define INST_EMMS_INDEX 750 +#define INST_ENTER_INDEX 755 +#define INST_EXTRACTPS_INDEX 761 +#define INST_F2XM1_INDEX 771 +#define INST_FABS_INDEX 777 +#define INST_FADD_INDEX 782 +#define INST_FADDP_INDEX 787 +#define INST_FBLD_INDEX 793 +#define INST_FBSTP_INDEX 798 +#define INST_FCHS_INDEX 804 +#define INST_FCLEX_INDEX 809 +#define INST_FCMOVB_INDEX 815 +#define INST_FCMOVBE_INDEX 822 +#define INST_FCMOVE_INDEX 830 +#define INST_FCMOVNB_INDEX 837 +#define INST_FCMOVNBE_INDEX 845 +#define INST_FCMOVNE_INDEX 854 +#define INST_FCMOVNU_INDEX 862 +#define INST_FCMOVU_INDEX 870 +#define INST_FCOM_INDEX 877 +#define INST_FCOMI_INDEX 882 +#define INST_FCOMIP_INDEX 888 +#define INST_FCOMP_INDEX 895 +#define INST_FCOMPP_INDEX 901 +#define INST_FCOS_INDEX 908 +#define INST_FDECSTP_INDEX 913 +#define INST_FDIV_INDEX 921 +#define INST_FDIVP_INDEX 926 +#define INST_FDIVR_INDEX 932 +#define INST_FDIVRP_INDEX 938 +#define INST_FEMMS_INDEX 945 +#define INST_FFREE_INDEX 951 +#define INST_FIADD_INDEX 957 +#define INST_FICOM_INDEX 963 +#define INST_FICOMP_INDEX 969 +#define INST_FIDIV_INDEX 976 +#define INST_FIDIVR_INDEX 982 +#define INST_FILD_INDEX 989 +#define INST_FIMUL_INDEX 994 +#define INST_FINCSTP_INDEX 1000 +#define INST_FINIT_INDEX 1008 +#define INST_FIST_INDEX 1014 +#define INST_FISTP_INDEX 1019 +#define INST_FISTTP_INDEX 1025 +#define INST_FISUB_INDEX 1032 +#define INST_FISUBR_INDEX 1038 +#define INST_FLD_INDEX 1045 +#define INST_FLD1_INDEX 1049 +#define INST_FLDCW_INDEX 1054 +#define INST_FLDENV_INDEX 1060 +#define INST_FLDL2E_INDEX 1067 +#define INST_FLDL2T_INDEX 1074 +#define INST_FLDLG2_INDEX 1081 +#define INST_FLDLN2_INDEX 1088 +#define INST_FLDPI_INDEX 1095 +#define INST_FLDZ_INDEX 1101 +#define INST_FMUL_INDEX 1106 +#define INST_FMULP_INDEX 1111 +#define INST_FNCLEX_INDEX 1117 +#define INST_FNINIT_INDEX 1124 +#define INST_FNOP_INDEX 1131 +#define INST_FNSAVE_INDEX 1136 +#define INST_FNSTCW_INDEX 1143 +#define INST_FNSTENV_INDEX 1150 +#define INST_FNSTSW_INDEX 1158 +#define INST_FPATAN_INDEX 1165 +#define INST_FPREM_INDEX 1172 +#define INST_FPREM1_INDEX 1178 +#define INST_FPTAN_INDEX 1185 +#define INST_FRNDINT_INDEX 1191 +#define INST_FRSTOR_INDEX 1199 +#define INST_FSAVE_INDEX 1206 +#define INST_FSCALE_INDEX 1212 +#define INST_FSIN_INDEX 1219 +#define INST_FSINCOS_INDEX 1224 +#define INST_FSQRT_INDEX 1232 +#define INST_FST_INDEX 1238 +#define INST_FSTCW_INDEX 1242 +#define INST_FSTENV_INDEX 1248 +#define INST_FSTP_INDEX 1255 +#define INST_FSTSW_INDEX 1260 +#define INST_FSUB_INDEX 1266 +#define INST_FSUBP_INDEX 1271 +#define INST_FSUBR_INDEX 1277 +#define INST_FSUBRP_INDEX 1283 +#define INST_FTST_INDEX 1290 +#define INST_FUCOM_INDEX 1295 +#define INST_FUCOMI_INDEX 1301 +#define INST_FUCOMIP_INDEX 1308 +#define INST_FUCOMP_INDEX 1316 +#define INST_FUCOMPP_INDEX 1323 +#define INST_FWAIT_INDEX 1331 +#define INST_FXAM_INDEX 1337 +#define INST_FXCH_INDEX 1342 +#define INST_FXRSTOR_INDEX 1347 +#define INST_FXSAVE_INDEX 1355 +#define INST_FXTRACT_INDEX 1362 +#define INST_FYL2X_INDEX 1370 +#define INST_FYL2XP1_INDEX 1376 +#define INST_HADDPD_INDEX 1384 +#define INST_HADDPS_INDEX 1391 +#define INST_HSUBPD_INDEX 1398 +#define INST_HSUBPS_INDEX 1405 +#define INST_IDIV_INDEX 1412 +#define INST_IMUL_INDEX 1417 +#define INST_INC_INDEX 1422 +#define INST_INT3_INDEX 1426 +#define INST_JA_INDEX 1431 +#define INST_JAE_INDEX 1434 +#define INST_JB_INDEX 1438 +#define INST_JBE_INDEX 1441 +#define INST_JC_INDEX 1445 +#define INST_JE_INDEX 1448 +#define INST_JG_INDEX 1451 +#define INST_JGE_INDEX 1454 +#define INST_JL_INDEX 1458 +#define INST_JLE_INDEX 1461 +#define INST_JNA_INDEX 1465 +#define INST_JNAE_INDEX 1469 +#define INST_JNB_INDEX 1474 +#define INST_JNBE_INDEX 1478 +#define INST_JNC_INDEX 1483 +#define INST_JNE_INDEX 1487 +#define INST_JNG_INDEX 1491 +#define INST_JNGE_INDEX 1495 +#define INST_JNL_INDEX 1500 +#define INST_JNLE_INDEX 1504 +#define INST_JNO_INDEX 1509 +#define INST_JNP_INDEX 1513 +#define INST_JNS_INDEX 1517 +#define INST_JNZ_INDEX 1521 +#define INST_JO_INDEX 1525 +#define INST_JP_INDEX 1528 +#define INST_JPE_INDEX 1531 +#define INST_JPO_INDEX 1535 +#define INST_JS_INDEX 1539 +#define INST_JZ_INDEX 1542 +#define INST_JMP_INDEX 1545 +#define INST_LDDQU_INDEX 1549 +#define INST_LDMXCSR_INDEX 1555 +#define INST_LAHF_INDEX 1563 +#define INST_LEA_INDEX 1568 +#define INST_LEAVE_INDEX 1572 +#define INST_LFENCE_INDEX 1578 +#define INST_MASKMOVDQU_INDEX 1585 +#define INST_MASKMOVQ_INDEX 1596 +#define INST_MAXPD_INDEX 1605 +#define INST_MAXPS_INDEX 1611 +#define INST_MAXSD_INDEX 1617 +#define INST_MAXSS_INDEX 1623 +#define INST_MFENCE_INDEX 1629 +#define INST_MINPD_INDEX 1636 +#define INST_MINPS_INDEX 1642 +#define INST_MINSD_INDEX 1648 +#define INST_MINSS_INDEX 1654 +#define INST_MONITOR_INDEX 1660 +#define INST_MOV_INDEX 1668 +#define INST_MOVAPD_INDEX 1672 +#define INST_MOVAPS_INDEX 1679 +#define INST_MOVBE_INDEX 1686 +#define INST_MOVD_INDEX 1692 +#define INST_MOVDDUP_INDEX 1697 +#define INST_MOVDQ2Q_INDEX 1705 +#define INST_MOVDQA_INDEX 1713 +#define INST_MOVDQU_INDEX 1720 +#define INST_MOVHLPS_INDEX 1727 +#define INST_MOVHPD_INDEX 1735 +#define INST_MOVHPS_INDEX 1742 +#define INST_MOVLHPS_INDEX 1749 +#define INST_MOVLPD_INDEX 1757 +#define INST_MOVLPS_INDEX 1764 +#define INST_MOVMSKPD_INDEX 1771 +#define INST_MOVMSKPS_INDEX 1780 +#define INST_MOVNTDQ_INDEX 1789 +#define INST_MOVNTDQA_INDEX 1797 +#define INST_MOVNTI_INDEX 1806 +#define INST_MOVNTPD_INDEX 1813 +#define INST_MOVNTPS_INDEX 1821 +#define INST_MOVNTQ_INDEX 1829 +#define INST_MOVQ_INDEX 1836 +#define INST_MOVQ2DQ_INDEX 1841 +#define INST_MOVSD_INDEX 1849 +#define INST_MOVSHDUP_INDEX 1855 +#define INST_MOVSLDUP_INDEX 1864 +#define INST_MOVSS_INDEX 1873 +#define INST_MOVSX_INDEX 1879 +#define INST_MOVSXD_INDEX 1885 +#define INST_MOVUPD_INDEX 1892 +#define INST_MOVUPS_INDEX 1899 +#define INST_MOVZX_INDEX 1906 +#define INST_MOV_PTR_INDEX 1912 +#define INST_MPSADBW_INDEX 1920 +#define INST_MUL_INDEX 1928 +#define INST_MULPD_INDEX 1932 +#define INST_MULPS_INDEX 1938 +#define INST_MULSD_INDEX 1944 +#define INST_MULSS_INDEX 1950 +#define INST_MWAIT_INDEX 1956 +#define INST_NEG_INDEX 1962 +#define INST_NOP_INDEX 1966 +#define INST_NOT_INDEX 1970 +#define INST_OR_INDEX 1974 +#define INST_ORPD_INDEX 1977 +#define INST_ORPS_INDEX 1982 +#define INST_PABSB_INDEX 1987 +#define INST_PABSD_INDEX 1993 +#define INST_PABSW_INDEX 1999 +#define INST_PACKSSDW_INDEX 2005 +#define INST_PACKSSWB_INDEX 2014 +#define INST_PACKUSDW_INDEX 2023 +#define INST_PACKUSWB_INDEX 2032 +#define INST_PADDB_INDEX 2041 +#define INST_PADDD_INDEX 2047 +#define INST_PADDQ_INDEX 2053 +#define INST_PADDSB_INDEX 2059 +#define INST_PADDSW_INDEX 2066 +#define INST_PADDUSB_INDEX 2073 +#define INST_PADDUSW_INDEX 2081 +#define INST_PADDW_INDEX 2089 +#define INST_PALIGNR_INDEX 2095 +#define INST_PAND_INDEX 2103 +#define INST_PANDN_INDEX 2108 +#define INST_PAUSE_INDEX 2114 +#define INST_PAVGB_INDEX 2120 +#define INST_PAVGW_INDEX 2126 +#define INST_PBLENDVB_INDEX 2132 +#define INST_PBLENDW_INDEX 2141 +#define INST_PCMPEQB_INDEX 2149 +#define INST_PCMPEQD_INDEX 2157 +#define INST_PCMPEQQ_INDEX 2165 +#define INST_PCMPEQW_INDEX 2173 +#define INST_PCMPESTRI_INDEX 2181 +#define INST_PCMPESTRM_INDEX 2191 +#define INST_PCMPGTB_INDEX 2201 +#define INST_PCMPGTD_INDEX 2209 +#define INST_PCMPGTQ_INDEX 2217 +#define INST_PCMPGTW_INDEX 2225 +#define INST_PCMPISTRI_INDEX 2233 +#define INST_PCMPISTRM_INDEX 2243 +#define INST_PEXTRB_INDEX 2253 +#define INST_PEXTRD_INDEX 2260 +#define INST_PEXTRQ_INDEX 2267 +#define INST_PEXTRW_INDEX 2274 +#define INST_PF2ID_INDEX 2281 +#define INST_PF2IW_INDEX 2287 +#define INST_PFACC_INDEX 2293 +#define INST_PFADD_INDEX 2299 +#define INST_PFCMPEQ_INDEX 2305 +#define INST_PFCMPGE_INDEX 2313 +#define INST_PFCMPGT_INDEX 2321 +#define INST_PFMAX_INDEX 2329 +#define INST_PFMIN_INDEX 2335 +#define INST_PFMUL_INDEX 2341 +#define INST_PFNACC_INDEX 2347 +#define INST_PFPNACC_INDEX 2354 +#define INST_PFRCP_INDEX 2362 +#define INST_PFRCPIT1_INDEX 2368 +#define INST_PFRCPIT2_INDEX 2377 +#define INST_PFRSQIT1_INDEX 2386 +#define INST_PFRSQRT_INDEX 2395 +#define INST_PFSUB_INDEX 2403 +#define INST_PFSUBR_INDEX 2409 +#define INST_PHADDD_INDEX 2416 +#define INST_PHADDSW_INDEX 2423 +#define INST_PHADDW_INDEX 2431 +#define INST_PHMINPOSUW_INDEX 2438 +#define INST_PHSUBD_INDEX 2449 +#define INST_PHSUBSW_INDEX 2456 +#define INST_PHSUBW_INDEX 2464 +#define INST_PI2FD_INDEX 2471 +#define INST_PI2FW_INDEX 2477 +#define INST_PINSRB_INDEX 2483 +#define INST_PINSRD_INDEX 2490 +#define INST_PINSRQ_INDEX 2497 +#define INST_PINSRW_INDEX 2504 +#define INST_PMADDUBSW_INDEX 2511 +#define INST_PMADDWD_INDEX 2521 +#define INST_PMAXSB_INDEX 2529 +#define INST_PMAXSD_INDEX 2536 +#define INST_PMAXSW_INDEX 2543 +#define INST_PMAXUB_INDEX 2550 +#define INST_PMAXUD_INDEX 2557 +#define INST_PMAXUW_INDEX 2564 +#define INST_PMINSB_INDEX 2571 +#define INST_PMINSD_INDEX 2578 +#define INST_PMINSW_INDEX 2585 +#define INST_PMINUB_INDEX 2592 +#define INST_PMINUD_INDEX 2599 +#define INST_PMINUW_INDEX 2606 +#define INST_PMOVMSKB_INDEX 2613 +#define INST_PMOVSXBD_INDEX 2622 +#define INST_PMOVSXBQ_INDEX 2631 +#define INST_PMOVSXBW_INDEX 2640 +#define INST_PMOVSXDQ_INDEX 2649 +#define INST_PMOVSXWD_INDEX 2658 +#define INST_PMOVSXWQ_INDEX 2667 +#define INST_PMOVZXBD_INDEX 2676 +#define INST_PMOVZXBQ_INDEX 2685 +#define INST_PMOVZXBW_INDEX 2694 +#define INST_PMOVZXDQ_INDEX 2703 +#define INST_PMOVZXWD_INDEX 2712 +#define INST_PMOVZXWQ_INDEX 2721 +#define INST_PMULDQ_INDEX 2730 +#define INST_PMULHRSW_INDEX 2737 +#define INST_PMULHUW_INDEX 2746 +#define INST_PMULHW_INDEX 2754 +#define INST_PMULLD_INDEX 2761 +#define INST_PMULLW_INDEX 2768 +#define INST_PMULUDQ_INDEX 2775 +#define INST_POP_INDEX 2783 +#define INST_POPAD_INDEX 2787 +#define INST_POPCNT_INDEX 2793 +#define INST_POPFD_INDEX 2800 +#define INST_POPFQ_INDEX 2806 +#define INST_POR_INDEX 2812 +#define INST_PREFETCH_INDEX 2816 +#define INST_PSADBW_INDEX 2825 +#define INST_PSHUFB_INDEX 2832 +#define INST_PSHUFD_INDEX 2839 +#define INST_PSHUFW_INDEX 2846 +#define INST_PSHUFHW_INDEX 2853 +#define INST_PSHUFLW_INDEX 2861 +#define INST_PSIGNB_INDEX 2869 +#define INST_PSIGND_INDEX 2876 +#define INST_PSIGNW_INDEX 2883 +#define INST_PSLLD_INDEX 2890 +#define INST_PSLLDQ_INDEX 2896 +#define INST_PSLLQ_INDEX 2903 +#define INST_PSLLW_INDEX 2909 +#define INST_PSRAD_INDEX 2915 +#define INST_PSRAW_INDEX 2921 +#define INST_PSRLD_INDEX 2927 +#define INST_PSRLDQ_INDEX 2933 +#define INST_PSRLQ_INDEX 2940 +#define INST_PSRLW_INDEX 2946 +#define INST_PSUBB_INDEX 2952 +#define INST_PSUBD_INDEX 2958 +#define INST_PSUBQ_INDEX 2964 +#define INST_PSUBSB_INDEX 2970 +#define INST_PSUBSW_INDEX 2977 +#define INST_PSUBUSB_INDEX 2984 +#define INST_PSUBUSW_INDEX 2992 +#define INST_PSUBW_INDEX 3000 +#define INST_PSWAPD_INDEX 3006 +#define INST_PTEST_INDEX 3013 +#define INST_PUNPCKHBW_INDEX 3019 +#define INST_PUNPCKHDQ_INDEX 3029 +#define INST_PUNPCKHQDQ_INDEX 3039 +#define INST_PUNPCKHWD_INDEX 3050 +#define INST_PUNPCKLBW_INDEX 3060 +#define INST_PUNPCKLDQ_INDEX 3070 +#define INST_PUNPCKLQDQ_INDEX 3080 +#define INST_PUNPCKLWD_INDEX 3091 +#define INST_PUSH_INDEX 3101 +#define INST_PUSHAD_INDEX 3106 +#define INST_PUSHFD_INDEX 3113 +#define INST_PUSHFQ_INDEX 3120 +#define INST_PXOR_INDEX 3127 +#define INST_RCL_INDEX 3132 +#define INST_RCPPS_INDEX 3136 +#define INST_RCPSS_INDEX 3142 +#define INST_RCR_INDEX 3148 +#define INST_RDTSC_INDEX 3152 +#define INST_RDTSCP_INDEX 3158 +#define INST_REP_LODSB_INDEX 3165 +#define INST_REP_LODSD_INDEX 3175 +#define INST_REP_LODSQ_INDEX 3185 +#define INST_REP_LODSW_INDEX 3195 +#define INST_REP_MOVSB_INDEX 3205 +#define INST_REP_MOVSD_INDEX 3215 +#define INST_REP_MOVSQ_INDEX 3225 +#define INST_REP_MOVSW_INDEX 3235 +#define INST_REP_STOSB_INDEX 3245 +#define INST_REP_STOSD_INDEX 3255 +#define INST_REP_STOSQ_INDEX 3265 +#define INST_REP_STOSW_INDEX 3275 +#define INST_REPE_CMPSB_INDEX 3285 +#define INST_REPE_CMPSD_INDEX 3296 +#define INST_REPE_CMPSQ_INDEX 3307 +#define INST_REPE_CMPSW_INDEX 3318 +#define INST_REPE_SCASB_INDEX 3329 +#define INST_REPE_SCASD_INDEX 3340 +#define INST_REPE_SCASQ_INDEX 3351 +#define INST_REPE_SCASW_INDEX 3362 +#define INST_REPNE_CMPSB_INDEX 3373 +#define INST_REPNE_CMPSD_INDEX 3385 +#define INST_REPNE_CMPSQ_INDEX 3397 +#define INST_REPNE_CMPSW_INDEX 3409 +#define INST_REPNE_SCASB_INDEX 3421 +#define INST_REPNE_SCASD_INDEX 3433 +#define INST_REPNE_SCASQ_INDEX 3445 +#define INST_REPNE_SCASW_INDEX 3457 +#define INST_RET_INDEX 3469 +#define INST_ROL_INDEX 3473 +#define INST_ROR_INDEX 3477 +#define INST_ROUNDPD_INDEX 3481 +#define INST_ROUNDPS_INDEX 3489 +#define INST_ROUNDSD_INDEX 3497 +#define INST_ROUNDSS_INDEX 3505 +#define INST_RSQRTPS_INDEX 3513 +#define INST_RSQRTSS_INDEX 3521 +#define INST_SAHF_INDEX 3529 +#define INST_SAL_INDEX 3534 +#define INST_SAR_INDEX 3538 +#define INST_SBB_INDEX 3542 +#define INST_SETA_INDEX 3546 +#define INST_SETAE_INDEX 3551 +#define INST_SETB_INDEX 3557 +#define INST_SETBE_INDEX 3562 +#define INST_SETC_INDEX 3568 +#define INST_SETE_INDEX 3573 +#define INST_SETG_INDEX 3578 +#define INST_SETGE_INDEX 3583 +#define INST_SETL_INDEX 3589 +#define INST_SETLE_INDEX 3594 +#define INST_SETNA_INDEX 3600 +#define INST_SETNAE_INDEX 3606 +#define INST_SETNB_INDEX 3613 +#define INST_SETNBE_INDEX 3619 +#define INST_SETNC_INDEX 3626 +#define INST_SETNE_INDEX 3632 +#define INST_SETNG_INDEX 3638 +#define INST_SETNGE_INDEX 3644 +#define INST_SETNL_INDEX 3651 +#define INST_SETNLE_INDEX 3657 +#define INST_SETNO_INDEX 3664 +#define INST_SETNP_INDEX 3670 +#define INST_SETNS_INDEX 3676 +#define INST_SETNZ_INDEX 3682 +#define INST_SETO_INDEX 3688 +#define INST_SETP_INDEX 3693 +#define INST_SETPE_INDEX 3698 +#define INST_SETPO_INDEX 3704 +#define INST_SETS_INDEX 3710 +#define INST_SETZ_INDEX 3715 +#define INST_SFENCE_INDEX 3720 +#define INST_SHL_INDEX 3727 +#define INST_SHLD_INDEX 3731 +#define INST_SHR_INDEX 3736 +#define INST_SHRD_INDEX 3740 +#define INST_SHUFPD_INDEX 3745 +#define INST_SHUFPS_INDEX 3752 +#define INST_SQRTPD_INDEX 3759 +#define INST_SQRTPS_INDEX 3766 +#define INST_SQRTSD_INDEX 3773 +#define INST_SQRTSS_INDEX 3780 +#define INST_STC_INDEX 3787 +#define INST_STD_INDEX 3791 +#define INST_STMXCSR_INDEX 3795 +#define INST_SUB_INDEX 3803 +#define INST_SUBPD_INDEX 3807 +#define INST_SUBPS_INDEX 3813 +#define INST_SUBSD_INDEX 3819 +#define INST_SUBSS_INDEX 3825 +#define INST_TEST_INDEX 3831 +#define INST_UCOMISD_INDEX 3836 +#define INST_UCOMISS_INDEX 3844 +#define INST_UD2_INDEX 3852 +#define INST_UNPCKHPD_INDEX 3856 +#define INST_UNPCKHPS_INDEX 3865 +#define INST_UNPCKLPD_INDEX 3874 +#define INST_UNPCKLPS_INDEX 3883 +#define INST_XADD_INDEX 3892 +#define INST_XCHG_INDEX 3897 +#define INST_XOR_INDEX 3902 +#define INST_XORPD_INDEX 3906 +#define INST_XORPS_INDEX 3912 +// ${INSTRUCTION_DATA_END} + +// ============================================================================ +// [AsmJit::Instruction Description] +// ============================================================================ + +#define MAKE_INST(code, name, group, flags, oflags0, oflags1, opReg, opCode0, opCode1) \ + { code, code##_INDEX, group, flags, { oflags0, oflags1 }, opReg, { opCode0, opCode1 } } + +#define G(g) InstructionDescription::G_##g +#define F(f) InstructionDescription::F_##f +#define O(o) InstructionDescription::O_##o + +const InstructionDescription instructionDescription[] = +{ + // Instruction code (enum) | instruction name | instruction group| instruction flags| oflags[0] | oflags[1] | r| opCode[0] | opcode[1] + MAKE_INST(INST_ADC , "adc" , G(ALU) , F(LOCKABLE) , O(GQDWB_MEM) , O(GQDWB_MEM)|O(IMM) , 2, 0x00000010, 0x00000080), + MAKE_INST(INST_ADD , "add" , G(ALU) , F(LOCKABLE) , O(GQDWB_MEM) , O(GQDWB_MEM)|O(IMM) , 0, 0x00000000, 0x00000080), + MAKE_INST(INST_ADDPD , "addpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F58, 0), + MAKE_INST(INST_ADDPS , "addps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F58, 0), + MAKE_INST(INST_ADDSD , "addsd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF2000F58, 0), + MAKE_INST(INST_ADDSS , "addss" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F58, 0), + MAKE_INST(INST_ADDSUBPD , "addsubpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000FD0, 0), + MAKE_INST(INST_ADDSUBPS , "addsubps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF2000FD0, 0), + MAKE_INST(INST_AMD_PREFETCH , "amd_prefetch" , G(M) , F(NONE) , O(MEM) , 0 , 0, 0x00000F0D, 0), + MAKE_INST(INST_AMD_PREFETCHW , "amd_prefetchw" , G(M) , F(NONE) , O(MEM) , 0 , 1, 0x00000F0D, 0), + MAKE_INST(INST_AND , "and" , G(ALU) , F(LOCKABLE) , O(GQDWB_MEM) , O(GQDWB_MEM)|O(IMM) , 4, 0x00000020, 0x00000080), + MAKE_INST(INST_ANDNPD , "andnpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F55, 0), + MAKE_INST(INST_ANDNPS , "andnps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F55, 0), + MAKE_INST(INST_ANDPD , "andpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F54, 0), + MAKE_INST(INST_ANDPS , "andps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F54, 0), + MAKE_INST(INST_BLENDPD , "blendpd" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A0D, 0), + MAKE_INST(INST_BLENDPS , "blendps" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A0C, 0), + MAKE_INST(INST_BLENDVPD , "blendvpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3815, 0), + MAKE_INST(INST_BLENDVPS , "blendvps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3814, 0), + MAKE_INST(INST_BSF , "bsf" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000FBC, 0), + MAKE_INST(INST_BSR , "bsr" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000FBD, 0), + MAKE_INST(INST_BSWAP , "bswap" , G(BSWAP) , F(NONE) , O(GQD) , 0 , 0, 0 , 0), + MAKE_INST(INST_BT , "bt" , G(BT) , F(NONE) , O(GQDW)|O(MEM) , O(GQDW)|O(IMM) , 4, 0x00000FA3, 0x00000FBA), + MAKE_INST(INST_BTC , "btc" , G(BT) , F(LOCKABLE) , O(GQDW)|O(MEM) , O(GQDW)|O(IMM) , 7, 0x00000FBB, 0x00000FBA), + MAKE_INST(INST_BTR , "btr" , G(BT) , F(LOCKABLE) , O(GQDW)|O(MEM) , O(GQDW)|O(IMM) , 6, 0x00000FB3, 0x00000FBA), + MAKE_INST(INST_BTS , "bts" , G(BT) , F(LOCKABLE) , O(GQDW)|O(MEM) , O(GQDW)|O(IMM) , 5, 0x00000FAB, 0x00000FBA), + MAKE_INST(INST_CALL , "call" , G(CALL) , F(JUMP) , O(GQD)|O(MEM) , 0 , 0, 0 , 0), + MAKE_INST(INST_CBW , "cbw" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x66000099, 0), + MAKE_INST(INST_CDQE , "cdqe" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x48000099, 0), + MAKE_INST(INST_CLC , "clc" , G(EMIT) , F(NONE) , 0 , 0 , 0, 0x000000F8, 0), + MAKE_INST(INST_CLD , "cld" , G(EMIT) , F(NONE) , 0 , 0 , 0, 0x000000FC, 0), + MAKE_INST(INST_CLFLUSH , "clflush" , G(M) , F(NONE) , O(MEM) , 0 , 7, 0x00000FAE, 0), + MAKE_INST(INST_CMC , "cmc" , G(EMIT) , F(NONE) , 0 , 0 , 0, 0x000000F5, 0), + MAKE_INST(INST_CMOVA , "cmova" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F47, 0), + MAKE_INST(INST_CMOVAE , "cmovae" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F43, 0), + MAKE_INST(INST_CMOVB , "cmovb" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F42, 0), + MAKE_INST(INST_CMOVBE , "cmovbe" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F46, 0), + MAKE_INST(INST_CMOVC , "cmovc" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F42, 0), + MAKE_INST(INST_CMOVE , "cmove" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F44, 0), + MAKE_INST(INST_CMOVG , "cmovg" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F4F, 0), + MAKE_INST(INST_CMOVGE , "cmovge" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F4D, 0), + MAKE_INST(INST_CMOVL , "cmovl" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F4C, 0), + MAKE_INST(INST_CMOVLE , "cmovle" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F4E, 0), + MAKE_INST(INST_CMOVNA , "cmovna" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F46, 0), + MAKE_INST(INST_CMOVNAE , "cmovnae" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F42, 0), + MAKE_INST(INST_CMOVNB , "cmovnb" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F43, 0), + MAKE_INST(INST_CMOVNBE , "cmovnbe" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F47, 0), + MAKE_INST(INST_CMOVNC , "cmovnc" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F43, 0), + MAKE_INST(INST_CMOVNE , "cmovne" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F45, 0), + MAKE_INST(INST_CMOVNG , "cmovng" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F4E, 0), + MAKE_INST(INST_CMOVNGE , "cmovnge" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F4C, 0), + MAKE_INST(INST_CMOVNL , "cmovnl" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F4D, 0), + MAKE_INST(INST_CMOVNLE , "cmovnle" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F4F, 0), + MAKE_INST(INST_CMOVNO , "cmovno" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F41, 0), + MAKE_INST(INST_CMOVNP , "cmovnp" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F4B, 0), + MAKE_INST(INST_CMOVNS , "cmovns" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F49, 0), + MAKE_INST(INST_CMOVNZ , "cmovnz" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F45, 0), + MAKE_INST(INST_CMOVO , "cmovo" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F40, 0), + MAKE_INST(INST_CMOVP , "cmovp" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F4A, 0), + MAKE_INST(INST_CMOVPE , "cmovpe" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F4A, 0), + MAKE_INST(INST_CMOVPO , "cmovpo" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F4B, 0), + MAKE_INST(INST_CMOVS , "cmovs" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F48, 0), + MAKE_INST(INST_CMOVZ , "cmovz" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0x00000F44, 0), + MAKE_INST(INST_CMP , "cmp" , G(ALU) , F(NONE) , O(GQDWB_MEM) , O(GQDWB_MEM)|O(IMM) , 7, 0x00000038, 0x00000080), + MAKE_INST(INST_CMPPD , "cmppd" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000FC2, 0), + MAKE_INST(INST_CMPPS , "cmpps" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000FC2, 0), + MAKE_INST(INST_CMPSD , "cmpsd" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF2000FC2, 0), + MAKE_INST(INST_CMPSS , "cmpss" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF3000FC2, 0), + MAKE_INST(INST_CMPXCHG , "cmpxchg" , G(RM_R) , F(SPECIAL)|F(LOCKABLE), 0 , 0 , 0, 0x00000FB0, 0), + MAKE_INST(INST_CMPXCHG16B , "cmpxchg16b" , G(M) , F(SPECIAL) , O(MEM) , 0 , 1, 0x00000FC7, 1 /* RexW */), + MAKE_INST(INST_CMPXCHG8B , "cmpxchg8b" , G(M) , F(SPECIAL) , O(MEM) , 0 , 1, 0x00000FC7, 0), + MAKE_INST(INST_COMISD , "comisd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F2F, 0), + MAKE_INST(INST_COMISS , "comiss" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F2F, 0), + MAKE_INST(INST_CPUID , "cpuid" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x00000FA2, 0), + MAKE_INST(INST_CRC32 , "crc32" , G(CRC32) , F(NONE) , O(GQD) , O(GQDWB_MEM) , 0, 0xF20F38F0, 0), + MAKE_INST(INST_CVTDQ2PD , "cvtdq2pd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF3000FE6, 0), + MAKE_INST(INST_CVTDQ2PS , "cvtdq2ps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F5B, 0), + MAKE_INST(INST_CVTPD2DQ , "cvtpd2dq" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF2000FE6, 0), + MAKE_INST(INST_CVTPD2PI , "cvtpd2pi" , G(MMU_RMI) , F(NONE) , O(MM) , O(XMM_MEM) , 0, 0x66000F2D, 0), + MAKE_INST(INST_CVTPD2PS , "cvtpd2ps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F5A, 0), + MAKE_INST(INST_CVTPI2PD , "cvtpi2pd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(MM_MEM) , 0, 0x66000F2A, 0), + MAKE_INST(INST_CVTPI2PS , "cvtpi2ps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(MM_MEM) , 0, 0x00000F2A, 0), + MAKE_INST(INST_CVTPS2DQ , "cvtps2dq" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F5B, 0), + MAKE_INST(INST_CVTPS2PD , "cvtps2pd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F5A, 0), + MAKE_INST(INST_CVTPS2PI , "cvtps2pi" , G(MMU_RMI) , F(NONE) , O(MM) , O(XMM_MEM) , 0, 0x00000F2D, 0), + MAKE_INST(INST_CVTSD2SI , "cvtsd2si" , G(MMU_RMI) , F(NONE) , O(GQD) , O(XMM_MEM) , 0, 0xF2000F2D, 0), + MAKE_INST(INST_CVTSD2SS , "cvtsd2ss" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF2000F5A, 0), + MAKE_INST(INST_CVTSI2SD , "cvtsi2sd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(GQD)|O(MEM) , 0, 0xF2000F2A, 0), + MAKE_INST(INST_CVTSI2SS , "cvtsi2ss" , G(MMU_RMI) , F(NONE) , O(XMM) , O(GQD)|O(MEM) , 0, 0xF3000F2A, 0), + MAKE_INST(INST_CVTSS2SD , "cvtss2sd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F5A, 0), + MAKE_INST(INST_CVTSS2SI , "cvtss2si" , G(MMU_RMI) , F(NONE) , O(GQD) , O(XMM_MEM) , 0, 0xF3000F2D, 0), + MAKE_INST(INST_CVTTPD2DQ , "cvttpd2dq" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000FE6, 0), + MAKE_INST(INST_CVTTPD2PI , "cvttpd2pi" , G(MMU_RMI) , F(NONE) , O(MM) , O(XMM_MEM) , 0, 0x66000F2C, 0), + MAKE_INST(INST_CVTTPS2DQ , "cvttps2dq" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F5B, 0), + MAKE_INST(INST_CVTTPS2PI , "cvttps2pi" , G(MMU_RMI) , F(NONE) , O(MM) , O(XMM_MEM) , 0, 0x00000F2C, 0), + MAKE_INST(INST_CVTTSD2SI , "cvttsd2si" , G(MMU_RMI) , F(NONE) , O(GQD) , O(XMM_MEM) , 0, 0xF2000F2C, 0), + MAKE_INST(INST_CVTTSS2SI , "cvttss2si" , G(MMU_RMI) , F(NONE) , O(GQD) , O(XMM_MEM) , 0, 0xF3000F2C, 0), + MAKE_INST(INST_CWDE , "cwde" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x00000099, 0), + MAKE_INST(INST_DAA , "daa" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x00000027, 0), + MAKE_INST(INST_DAS , "das" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x0000002F, 0), + MAKE_INST(INST_DEC , "dec" , G(INC_DEC) , F(LOCKABLE) , O(GQDWB_MEM) , 0 , 1, 0x00000048, 0x000000FE), + MAKE_INST(INST_DIV , "div" , G(RM) , F(SPECIAL) , 0 , 0 , 6, 0x000000F6, 0), + MAKE_INST(INST_DIVPD , "divpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F5E, 0), + MAKE_INST(INST_DIVPS , "divps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F5E, 0), + MAKE_INST(INST_DIVSD , "divsd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF2000F5E, 0), + MAKE_INST(INST_DIVSS , "divss" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F5E, 0), + MAKE_INST(INST_DPPD , "dppd" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A41, 0), + MAKE_INST(INST_DPPS , "dpps" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A40, 0), + MAKE_INST(INST_EMMS , "emms" , G(EMIT) , F(NONE) , 0 , 0 , 0, 0x00000F77, 0), + MAKE_INST(INST_ENTER , "enter" , G(ENTER) , F(SPECIAL) , 0 , 0 , 0, 0x000000C8, 0), + MAKE_INST(INST_EXTRACTPS , "extractps" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A17, 0), + MAKE_INST(INST_F2XM1 , "f2xm1" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9F0, 0), + MAKE_INST(INST_FABS , "fabs" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9E1, 0), + MAKE_INST(INST_FADD , "fadd" , G(X87_FPU) , F(FPU) , 0 , 0 , 0, 0xD8DCC0C0, 0), + MAKE_INST(INST_FADDP , "faddp" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DEC0, 0), + MAKE_INST(INST_FBLD , "fbld" , G(M) , F(FPU) , O(MEM) , 0 , 4, 0x000000DF, 0), + MAKE_INST(INST_FBSTP , "fbstp" , G(M) , F(FPU) , O(MEM) , 0 , 6, 0x000000DF, 0), + MAKE_INST(INST_FCHS , "fchs" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9E0, 0), + MAKE_INST(INST_FCLEX , "fclex" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x9B00DBE2, 0), + MAKE_INST(INST_FCMOVB , "fcmovb" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DAC0, 0), + MAKE_INST(INST_FCMOVBE , "fcmovbe" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DAD0, 0), + MAKE_INST(INST_FCMOVE , "fcmove" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DAC8, 0), + MAKE_INST(INST_FCMOVNB , "fcmovnb" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DBC0, 0), + MAKE_INST(INST_FCMOVNBE , "fcmovnbe" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DBD0, 0), + MAKE_INST(INST_FCMOVNE , "fcmovne" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DBC8, 0), + MAKE_INST(INST_FCMOVNU , "fcmovnu" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DBD8, 0), + MAKE_INST(INST_FCMOVU , "fcmovu" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DAD8, 0), + MAKE_INST(INST_FCOM , "fcom" , G(X87_FPU) , F(FPU) , 0 , 0 , 2, 0xD8DCD0D0, 0), + MAKE_INST(INST_FCOMI , "fcomi" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DBF0, 0), + MAKE_INST(INST_FCOMIP , "fcomip" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DFF0, 0), + MAKE_INST(INST_FCOMP , "fcomp" , G(X87_FPU) , F(FPU) , 0 , 0 , 3, 0xD8DCD8D8, 0), + MAKE_INST(INST_FCOMPP , "fcompp" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000DED9, 0), + MAKE_INST(INST_FCOS , "fcos" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9FF, 0), + MAKE_INST(INST_FDECSTP , "fdecstp" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9F6, 0), + MAKE_INST(INST_FDIV , "fdiv" , G(X87_FPU) , F(FPU) , 0 , 0 , 6, 0xD8DCF0F8, 0), + MAKE_INST(INST_FDIVP , "fdivp" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DEF8, 0), + MAKE_INST(INST_FDIVR , "fdivr" , G(X87_FPU) , F(FPU) , 0 , 0 , 7, 0xD8DCF8F0, 0), + MAKE_INST(INST_FDIVRP , "fdivrp" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DEF0, 0), + MAKE_INST(INST_FEMMS , "femms" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x00000F0E, 0), + MAKE_INST(INST_FFREE , "ffree" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DDC0, 0), + MAKE_INST(INST_FIADD , "fiadd" , G(X87_MEM) , F(FPU) , O(FM_2_4) , 0 , 0, 0xDEDA0000, 0), + MAKE_INST(INST_FICOM , "ficom" , G(X87_MEM) , F(FPU) , O(FM_2_4) , 0 , 2, 0xDEDA0000, 0), + MAKE_INST(INST_FICOMP , "ficomp" , G(X87_MEM) , F(FPU) , O(FM_2_4) , 0 , 3, 0xDEDA0000, 0), + MAKE_INST(INST_FIDIV , "fidiv" , G(X87_MEM) , F(FPU) , O(FM_2_4) , 0 , 6, 0xDEDA0000, 0), + MAKE_INST(INST_FIDIVR , "fidivr" , G(X87_MEM) , F(FPU) , O(FM_2_4) , 0 , 7, 0xDEDA0000, 0), + MAKE_INST(INST_FILD , "fild" , G(X87_MEM) , F(FPU) , O(FM_2_4_8) , 0 , 0, 0xDFDBDF05, 0), + MAKE_INST(INST_FIMUL , "fimul" , G(X87_MEM) , F(FPU) , O(FM_2_4) , 0 , 1, 0xDEDA0000, 0), + MAKE_INST(INST_FINCSTP , "fincstp" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9F7, 0), + MAKE_INST(INST_FINIT , "finit" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x9B00DBE3, 0), + MAKE_INST(INST_FIST , "fist" , G(X87_MEM) , F(FPU) , O(FM_2_4) , 0 , 2, 0xDFDB0000, 0), + MAKE_INST(INST_FISTP , "fistp" , G(X87_MEM) , F(FPU) , O(FM_2_4_8) , 0 , 3, 0xDFDBDF07, 0), + MAKE_INST(INST_FISTTP , "fisttp" , G(X87_MEM) , F(FPU) , O(FM_2_4_8) , 0 , 1, 0xDFDBDD01, 0), + MAKE_INST(INST_FISUB , "fisub" , G(X87_MEM) , F(FPU) , O(FM_2_4) , 0 , 4, 0xDEDA0000, 0), + MAKE_INST(INST_FISUBR , "fisubr" , G(X87_MEM) , F(FPU) , O(FM_2_4) , 0 , 5, 0xDEDA0000, 0), + MAKE_INST(INST_FLD , "fld" , G(X87_MEM_STI) , F(FPU) , O(FM_4_8_10) , 0 , 0, 0x00D9DD00, 0xD9C0DB05), + MAKE_INST(INST_FLD1 , "fld1" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9E8, 0), + MAKE_INST(INST_FLDCW , "fldcw" , G(M) , F(FPU) , O(MEM) , 0 , 5, 0x000000D9, 0), + MAKE_INST(INST_FLDENV , "fldenv" , G(M) , F(FPU) , O(MEM) , 0 , 4, 0x000000D9, 0), + MAKE_INST(INST_FLDL2E , "fldl2e" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9EA, 0), + MAKE_INST(INST_FLDL2T , "fldl2t" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9E9, 0), + MAKE_INST(INST_FLDLG2 , "fldlg2" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9EC, 0), + MAKE_INST(INST_FLDLN2 , "fldln2" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9ED, 0), + MAKE_INST(INST_FLDPI , "fldpi" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9EB, 0), + MAKE_INST(INST_FLDZ , "fldz" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9EE, 0), + MAKE_INST(INST_FMUL , "fmul" , G(X87_FPU) , F(FPU) , 0 , 0 , 1, 0xD8DCC8C8, 0), + MAKE_INST(INST_FMULP , "fmulp" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DEC8, 0), + MAKE_INST(INST_FNCLEX , "fnclex" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000DBE2, 0), + MAKE_INST(INST_FNINIT , "fninit" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000DBE3, 0), + MAKE_INST(INST_FNOP , "fnop" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9D0, 0), + MAKE_INST(INST_FNSAVE , "fnsave" , G(M) , F(FPU) , O(MEM) , 0 , 6, 0x000000DD, 0), + MAKE_INST(INST_FNSTCW , "fnstcw" , G(M) , F(FPU) , O(MEM) , 0 , 7, 0x000000D9, 0), + MAKE_INST(INST_FNSTENV , "fnstenv" , G(M) , F(FPU) , O(MEM) , 0 , 6, 0x000000D9, 0), + MAKE_INST(INST_FNSTSW , "fnstsw" , G(X87_FSTSW) , F(FPU) , O(MEM) , 0 , 7, 0x000000DD, 0x0000DFE0), + MAKE_INST(INST_FPATAN , "fpatan" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9F3, 0), + MAKE_INST(INST_FPREM , "fprem" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9F8, 0), + MAKE_INST(INST_FPREM1 , "fprem1" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9F5, 0), + MAKE_INST(INST_FPTAN , "fptan" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9F2, 0), + MAKE_INST(INST_FRNDINT , "frndint" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9FC, 0), + MAKE_INST(INST_FRSTOR , "frstor" , G(M) , F(FPU) , O(MEM) , 0 , 4, 0x000000DD, 0), + MAKE_INST(INST_FSAVE , "fsave" , G(M) , F(FPU) , O(MEM) , 0 , 6, 0x9B0000DD, 0), + MAKE_INST(INST_FSCALE , "fscale" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9FD, 0), + MAKE_INST(INST_FSIN , "fsin" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9FE, 0), + MAKE_INST(INST_FSINCOS , "fsincos" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9FB, 0), + MAKE_INST(INST_FSQRT , "fsqrt" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9FA, 0), + MAKE_INST(INST_FST , "fst" , G(X87_MEM_STI) , F(FPU) , O(FM_4_8) , 0 , 2, 0x00D9DD02, 0xDDD00000), + MAKE_INST(INST_FSTCW , "fstcw" , G(M) , F(FPU) , O(MEM) , 0 , 7, 0x9B0000D9, 0), + MAKE_INST(INST_FSTENV , "fstenv" , G(M) , F(FPU) , O(MEM) , 0 , 6, 0x9B0000D9, 0), + MAKE_INST(INST_FSTP , "fstp" , G(X87_MEM_STI) , F(FPU) , O(FM_4_8_10) , 0 , 3, 0x00D9DD03, 0xDDD8DB07), + MAKE_INST(INST_FSTSW , "fstsw" , G(X87_FSTSW) , F(FPU) , O(MEM) , 0 , 7, 0x9B0000DD, 0x9B00DFE0), + MAKE_INST(INST_FSUB , "fsub" , G(X87_FPU) , F(FPU) , 0 , 0 , 4, 0xD8DCE0E8, 0), + MAKE_INST(INST_FSUBP , "fsubp" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DEE8, 0), + MAKE_INST(INST_FSUBR , "fsubr" , G(X87_FPU) , F(FPU) , 0 , 0 , 5, 0xD8DCE8E0, 0), + MAKE_INST(INST_FSUBRP , "fsubrp" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DEE0, 0), + MAKE_INST(INST_FTST , "ftst" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9E4, 0), + MAKE_INST(INST_FUCOM , "fucom" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DDE0, 0), + MAKE_INST(INST_FUCOMI , "fucomi" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DBE8, 0), + MAKE_INST(INST_FUCOMIP , "fucomip" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DFE8, 0), + MAKE_INST(INST_FUCOMP , "fucomp" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000DDE8, 0), + MAKE_INST(INST_FUCOMPP , "fucompp" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000DAE9, 0), + MAKE_INST(INST_FWAIT , "fwait" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x000000DB, 0), + MAKE_INST(INST_FXAM , "fxam" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9E5, 0), + MAKE_INST(INST_FXCH , "fxch" , G(X87_STI) , F(FPU) , 0 , 0 , 0, 0x0000D9C8, 0), + MAKE_INST(INST_FXRSTOR , "fxrstor" , G(M) , F(FPU) , 0 , 0 , 1, 0x00000FAE, 0), + MAKE_INST(INST_FXSAVE , "fxsave" , G(M) , F(FPU) , 0 , 0 , 0, 0x00000FAE, 0), + MAKE_INST(INST_FXTRACT , "fxtract" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9F4, 0), + MAKE_INST(INST_FYL2X , "fyl2x" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9F1, 0), + MAKE_INST(INST_FYL2XP1 , "fyl2xp1" , G(EMIT) , F(FPU) , 0 , 0 , 0, 0x0000D9F9, 0), + MAKE_INST(INST_HADDPD , "haddpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F7C, 0), + MAKE_INST(INST_HADDPS , "haddps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF2000F7C, 0), + MAKE_INST(INST_HSUBPD , "hsubpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F7D, 0), + MAKE_INST(INST_HSUBPS , "hsubps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF2000F7D, 0), + MAKE_INST(INST_IDIV , "idiv" , G(RM) , F(SPECIAL) , 0 , 0 , 7, 0x000000F6, 0), + MAKE_INST(INST_IMUL , "imul" , G(IMUL) , F(SPECIAL) , 0 , 0 , 0, 0 , 0), + MAKE_INST(INST_INC , "inc" , G(INC_DEC) , F(LOCKABLE) , O(GQDWB_MEM) , 0 , 0, 0x00000040, 0x000000FE), + MAKE_INST(INST_INT3 , "int3" , G(EMIT) , F(NONE) , 0 , 0 , 0, 0x000000CC, 0), + MAKE_INST(INST_JA , "ja" , G(J) , F(JUMP) , 0 , 0 , 0, 0x7 , 0), + MAKE_INST(INST_JAE , "jae" , G(J) , F(JUMP) , 0 , 0 , 0, 0x3 , 0), + MAKE_INST(INST_JB , "jb" , G(J) , F(JUMP) , 0 , 0 , 0, 0x2 , 0), + MAKE_INST(INST_JBE , "jbe" , G(J) , F(JUMP) , 0 , 0 , 0, 0x6 , 0), + MAKE_INST(INST_JC , "jc" , G(J) , F(JUMP) , 0 , 0 , 0, 0x2 , 0), + MAKE_INST(INST_JE , "je" , G(J) , F(JUMP) , 0 , 0 , 0, 0x4 , 0), + MAKE_INST(INST_JG , "jg" , G(J) , F(JUMP) , 0 , 0 , 0, 0xF , 0), + MAKE_INST(INST_JGE , "jge" , G(J) , F(JUMP) , 0 , 0 , 0, 0xD , 0), + MAKE_INST(INST_JL , "jl" , G(J) , F(JUMP) , 0 , 0 , 0, 0xC , 0), + MAKE_INST(INST_JLE , "jle" , G(J) , F(JUMP) , 0 , 0 , 0, 0xE , 0), + MAKE_INST(INST_JNA , "jna" , G(J) , F(JUMP) , 0 , 0 , 0, 0x6 , 0), + MAKE_INST(INST_JNAE , "jnae" , G(J) , F(JUMP) , 0 , 0 , 0, 0x2 , 0), + MAKE_INST(INST_JNB , "jnb" , G(J) , F(JUMP) , 0 , 0 , 0, 0x3 , 0), + MAKE_INST(INST_JNBE , "jnbe" , G(J) , F(JUMP) , 0 , 0 , 0, 0x7 , 0), + MAKE_INST(INST_JNC , "jnc" , G(J) , F(JUMP) , 0 , 0 , 0, 0x3 , 0), + MAKE_INST(INST_JNE , "jne" , G(J) , F(JUMP) , 0 , 0 , 0, 0x5 , 0), + MAKE_INST(INST_JNG , "jng" , G(J) , F(JUMP) , 0 , 0 , 0, 0xE , 0), + MAKE_INST(INST_JNGE , "jnge" , G(J) , F(JUMP) , 0 , 0 , 0, 0xC , 0), + MAKE_INST(INST_JNL , "jnl" , G(J) , F(JUMP) , 0 , 0 , 0, 0xD , 0), + MAKE_INST(INST_JNLE , "jnle" , G(J) , F(JUMP) , 0 , 0 , 0, 0xF , 0), + MAKE_INST(INST_JNO , "jno" , G(J) , F(JUMP) , 0 , 0 , 0, 0x1 , 0), + MAKE_INST(INST_JNP , "jnp" , G(J) , F(JUMP) , 0 , 0 , 0, 0xB , 0), + MAKE_INST(INST_JNS , "jns" , G(J) , F(JUMP) , 0 , 0 , 0, 0x9 , 0), + MAKE_INST(INST_JNZ , "jnz" , G(J) , F(JUMP) , 0 , 0 , 0, 0x5 , 0), + MAKE_INST(INST_JO , "jo" , G(J) , F(JUMP) , 0 , 0 , 0, 0x0 , 0), + MAKE_INST(INST_JP , "jp" , G(J) , F(JUMP) , 0 , 0 , 0, 0xA , 0), + MAKE_INST(INST_JPE , "jpe" , G(J) , F(JUMP) , 0 , 0 , 0, 0xA , 0), + MAKE_INST(INST_JPO , "jpo" , G(J) , F(JUMP) , 0 , 0 , 0, 0xB , 0), + MAKE_INST(INST_JS , "js" , G(J) , F(JUMP) , 0 , 0 , 0, 0x8 , 0), + MAKE_INST(INST_JZ , "jz" , G(J) , F(JUMP) , 0 , 0 , 0, 0x4 , 0), + MAKE_INST(INST_JMP , "jmp" , G(JMP) , F(JUMP) , 0 , 0 , 0, 0 , 0), + MAKE_INST(INST_LDDQU , "lddqu" , G(MMU_RMI) , F(NONE) , O(XMM) , O(MEM) , 0, 0xF2000FF0, 0), + MAKE_INST(INST_LDMXCSR , "ldmxcsr" , G(M) , F(NONE) , O(MEM) , 0 , 2, 0x00000FAE, 0), + MAKE_INST(INST_LEA , "lahf" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x0000009F, 0), + MAKE_INST(INST_LEA , "lea" , G(LEA) , F(NONE) , O(GQD) , O(MEM) , 0, 0 , 0), + MAKE_INST(INST_LEAVE , "leave" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x000000C9, 0), + MAKE_INST(INST_LFENCE , "lfence" , G(EMIT) , F(NONE) , 0 , 0 , 0, 0x000FAEE8, 0), + MAKE_INST(INST_MASKMOVDQU , "maskmovdqu" , G(MMU_RMI) , F(SPECIAL) , O(XMM) , O(XMM) , 0, 0x66000F57, 0), + MAKE_INST(INST_MASKMOVQ , "maskmovq" , G(MMU_RMI) , F(SPECIAL) , O(MM) , O(MM) , 0, 0x00000FF7, 0), + MAKE_INST(INST_MAXPD , "maxpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F5F, 0), + MAKE_INST(INST_MAXPS , "maxps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F5F, 0), + MAKE_INST(INST_MAXSD , "maxsd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF2000F5F, 0), + MAKE_INST(INST_MAXSS , "maxss" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F5F, 0), + MAKE_INST(INST_MFENCE , "mfence" , G(EMIT) , F(NONE) , 0 , 0 , 0, 0x000FAEF0, 0), + MAKE_INST(INST_MINPD , "minpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F5D, 0), + MAKE_INST(INST_MINPS , "minps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F5D, 0), + MAKE_INST(INST_MINSD , "minsd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF2000F5D, 0), + MAKE_INST(INST_MINSS , "minss" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F5D, 0), + MAKE_INST(INST_MONITOR , "monitor" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x000F01C8, 0), + MAKE_INST(INST_MOV , "mov" , G(MOV) , F(MOV) , O(GQDWB_MEM) , O(GQDWB_MEM)|O(IMM) , 0, 0 , 0), + MAKE_INST(INST_MOVAPD , "movapd" , G(MMU_MOV) , F(MOV) , O(XMM_MEM) , O(XMM_MEM) , 0, 0x66000F28, 0x66000F29), + MAKE_INST(INST_MOVAPS , "movaps" , G(MMU_MOV) , F(MOV) , O(XMM_MEM) , O(XMM_MEM) , 0, 0x00000F28, 0x00000F29), + MAKE_INST(INST_MOVBE , "movbe" , G(MOVBE) , F(MOV) , O(GQDW)|O(MEM) , O(GQDW)|O(MEM) , 0, 0x000F38F0, 0x000F38F1), + MAKE_INST(INST_MOVD , "movd" , G(MMU_MOVD) , F(MOV) , O(GD)|O(MM_XMM_MEM) , O(GD)|O(MM_XMM_MEM) , 0, 0 , 0), + MAKE_INST(INST_MOVDDUP , "movddup" , G(MMU_MOV) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0xF2000F12, 0), + MAKE_INST(INST_MOVDQ2Q , "movdq2q" , G(MMU_MOV) , F(MOV) , O(MM) , O(XMM) , 0, 0xF2000FD6, 0), + MAKE_INST(INST_MOVDQA , "movdqa" , G(MMU_MOV) , F(MOV) , O(XMM_MEM) , O(XMM_MEM) , 0, 0x66000F6F, 0x66000F7F), + MAKE_INST(INST_MOVDQU , "movdqu" , G(MMU_MOV) , F(MOV) , O(XMM_MEM) , O(XMM_MEM) , 0, 0xF3000F6F, 0xF3000F7F), + MAKE_INST(INST_MOVHLPS , "movhlps" , G(MMU_MOV) , F(NONE) , O(XMM) , O(XMM) , 0, 0x00000F12, 0), + MAKE_INST(INST_MOVHPD , "movhpd" , G(MMU_MOV) , F(NONE) , O(XMM_MEM) , O(XMM_MEM) , 0, 0x66000F16, 0x66000F17), + MAKE_INST(INST_MOVHPS , "movhps" , G(MMU_MOV) , F(NONE) , O(XMM_MEM) , O(XMM_MEM) , 0, 0x00000F16, 0x00000F17), + MAKE_INST(INST_MOVLHPS , "movlhps" , G(MMU_MOV) , F(NONE) , O(XMM) , O(XMM) , 0, 0x00000F16, 0), + MAKE_INST(INST_MOVLPD , "movlpd" , G(MMU_MOV) , F(NONE) , O(XMM_MEM) , O(XMM_MEM) , 0, 0x66000F12, 0x66000F13), + MAKE_INST(INST_MOVLPS , "movlps" , G(MMU_MOV) , F(NONE) , O(XMM_MEM) , O(XMM_MEM) , 0, 0x00000F12, 0x00000F13), + MAKE_INST(INST_MOVMSKPD , "movmskpd" , G(MMU_MOV) , F(MOV) , O(GQD)|O(NOREX) , O(XMM) , 0, 0x66000F50, 0), + MAKE_INST(INST_MOVMSKPS , "movmskps" , G(MMU_MOV) , F(MOV) , O(GQD)|O(NOREX) , O(XMM) , 0, 0x00000F50, 0), + MAKE_INST(INST_MOVNTDQ , "movntdq" , G(MMU_MOV) , F(NONE) , O(MEM) , O(XMM) , 0, 0 , 0x66000FE7), + MAKE_INST(INST_MOVNTDQA , "movntdqa" , G(MMU_MOV) , F(MOV) , O(XMM) , O(MEM) , 0, 0x660F382A, 0), + MAKE_INST(INST_MOVNTI , "movnti" , G(MMU_MOV) , F(MOV) , O(MEM) , O(GQD) , 0, 0 , 0x00000FC3), + MAKE_INST(INST_MOVNTPD , "movntpd" , G(MMU_MOV) , F(NONE) , O(MEM) , O(XMM) , 0, 0 , 0x66000F2B), + MAKE_INST(INST_MOVNTPS , "movntps" , G(MMU_MOV) , F(NONE) , O(MEM) , O(XMM) , 0, 0 , 0x00000F2B), + MAKE_INST(INST_MOVNTQ , "movntq" , G(MMU_MOV) , F(NONE) , O(MEM) , O(MM) , 0, 0 , 0x00000FE7), + MAKE_INST(INST_MOVQ , "movq" , G(MMU_MOVQ) , F(MOV) , O(GQ)|O(MM_XMM_MEM) , O(GQ)|O(MM_XMM_MEM) , 0, 0 , 0), + MAKE_INST(INST_MOVQ2DQ , "movq2dq" , G(MMU_RMI) , F(MOV) , O(XMM) , O(MM) , 0, 0xF3000FD6, 0), + MAKE_INST(INST_MOVSD , "movsd" , G(MMU_MOV) , F(NONE) , O(XMM_MEM) , O(XMM_MEM) , 0, 0xF2000F10, 0xF2000F11), + MAKE_INST(INST_MOVSHDUP , "movshdup" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F16, 0), + MAKE_INST(INST_MOVSLDUP , "movsldup" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F12, 0), + MAKE_INST(INST_MOVSS , "movss" , G(MMU_MOV) , F(NONE) , O(XMM_MEM) , O(XMM_MEM) , 0, 0xF3000F10, 0xF3000F11), + MAKE_INST(INST_MOVSX , "movsx" , G(MOVSX_MOVZX) , F(NONE) , O(GQDW) , O(GWB_MEM) , 0, 0x00000FBE, 0), + MAKE_INST(INST_MOVSXD , "movsxd" , G(MOVSXD) , F(NONE) , O(GQ) , O(GD_MEM) , 0, 0 , 0), + MAKE_INST(INST_MOVUPD , "movupd" , G(MMU_MOV) , F(MOV) , O(XMM_MEM) , O(XMM_MEM) , 0, 0x66000F10, 0x66000F11), + MAKE_INST(INST_MOVUPS , "movups" , G(MMU_MOV) , F(MOV) , O(XMM_MEM) , O(XMM_MEM) , 0, 0x00000F10, 0x00000F11), + MAKE_INST(INST_MOVZX , "movzx" , G(MOVSX_MOVZX) , F(MOV) , O(GQDW) , O(GWB_MEM) , 0, 0x00000FB6, 0), + MAKE_INST(INST_MOV_PTR , "mov_ptr" , G(MOV_PTR) , F(MOV)|F(SPECIAL), O(GQDWB) , O(IMM) , 0, 0 , 0), + MAKE_INST(INST_MPSADBW , "mpsadbw" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A42, 0), + MAKE_INST(INST_MUL , "mul" , G(RM) , F(SPECIAL) , 0 , 0 , 4, 0x000000F6, 0), + MAKE_INST(INST_MULPD , "mulpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F59, 0), + MAKE_INST(INST_MULPS , "mulps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F59, 0), + MAKE_INST(INST_MULSD , "mulsd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF2000F59, 0), + MAKE_INST(INST_MULSS , "mulss" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F59, 0), + MAKE_INST(INST_MWAIT , "mwait" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x000F01C9, 0), + MAKE_INST(INST_NEG , "neg" , G(RM) , F(LOCKABLE) , O(GQDWB_MEM) , 0 , 3, 0x000000F6, 0), + MAKE_INST(INST_NOP , "nop" , G(EMIT) , F(NONE) , 0 , 0 , 0, 0x00000090, 0), + MAKE_INST(INST_NOT , "not" , G(RM) , F(LOCKABLE) , O(GQDWB_MEM) , 0 , 2, 0x000000F6, 0), + MAKE_INST(INST_OR , "or" , G(ALU) , F(LOCKABLE) , O(GQDWB_MEM) , O(GQDWB_MEM)|O(IMM) , 1, 0x00000008, 0x00000080), + MAKE_INST(INST_ORPD , "orpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F56, 0), + MAKE_INST(INST_ORPS , "orps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F56, 0), + MAKE_INST(INST_PABSB , "pabsb" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F381C, 0), + MAKE_INST(INST_PABSD , "pabsd" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F381E, 0), + MAKE_INST(INST_PABSW , "pabsw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F381D, 0), + MAKE_INST(INST_PACKSSDW , "packssdw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F6B, 0), + MAKE_INST(INST_PACKSSWB , "packsswb" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F63, 0), + MAKE_INST(INST_PACKUSDW , "packusdw" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F382B, 0), + MAKE_INST(INST_PACKUSWB , "packuswb" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F67, 0), + MAKE_INST(INST_PADDB , "paddb" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FFC, 0), + MAKE_INST(INST_PADDD , "paddd" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FFE, 0), + MAKE_INST(INST_PADDQ , "paddq" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FD4, 0), + MAKE_INST(INST_PADDSB , "paddsb" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FEC, 0), + MAKE_INST(INST_PADDSW , "paddsw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FED, 0), + MAKE_INST(INST_PADDUSB , "paddusb" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FDC, 0), + MAKE_INST(INST_PADDUSW , "paddusw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FDD, 0), + MAKE_INST(INST_PADDW , "paddw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FFD, 0), + MAKE_INST(INST_PALIGNR , "palignr" , G(MMU_RM_IMM8) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F3A0F, 0), + MAKE_INST(INST_PAND , "pand" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FDB, 0), + MAKE_INST(INST_PANDN , "pandn" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FDF, 0), + MAKE_INST(INST_PAUSE , "pause" , G(EMIT) , F(NONE) , 0 , 0 , 0, 0xF3000090, 0), + MAKE_INST(INST_PAVGB , "pavgb" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FE0, 0), + MAKE_INST(INST_PAVGW , "pavgw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FE3, 0), + MAKE_INST(INST_PBLENDVB , "pblendvb" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3810, 0), + MAKE_INST(INST_PBLENDW , "pblendw" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A0E, 0), + MAKE_INST(INST_PCMPEQB , "pcmpeqb" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F74, 0), + MAKE_INST(INST_PCMPEQD , "pcmpeqd" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F76, 0), + MAKE_INST(INST_PCMPEQQ , "pcmpeqq" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3829, 0), + MAKE_INST(INST_PCMPEQW , "pcmpeqw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F75, 0), + MAKE_INST(INST_PCMPESTRI , "pcmpestri" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A61, 0), + MAKE_INST(INST_PCMPESTRM , "pcmpestrm" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A60, 0), + MAKE_INST(INST_PCMPGTB , "pcmpgtb" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F64, 0), + MAKE_INST(INST_PCMPGTD , "pcmpgtd" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F66, 0), + MAKE_INST(INST_PCMPGTQ , "pcmpgtq" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3837, 0), + MAKE_INST(INST_PCMPGTW , "pcmpgtw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F65, 0), + MAKE_INST(INST_PCMPISTRI , "pcmpistri" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A63, 0), + MAKE_INST(INST_PCMPISTRM , "pcmpistrm" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A62, 0), + MAKE_INST(INST_PEXTRB , "pextrb" , G(MMU_PEXTR) , F(NONE) , O(GD)|O(GB)|O(MEM) , O(XMM) , 0, 0x000F3A14, 0), + MAKE_INST(INST_PEXTRD , "pextrd" , G(MMU_PEXTR) , F(NONE) , O(GD) |O(MEM) , O(XMM) , 0, 0x000F3A16, 0), + MAKE_INST(INST_PEXTRQ , "pextrq" , G(MMU_PEXTR) , F(NONE) , O(GQD) |O(MEM) , O(XMM) , 1, 0x000F3A16, 0), + MAKE_INST(INST_PEXTRW , "pextrw" , G(MMU_PEXTR) , F(NONE) , O(GD) |O(MEM) , O(XMM) | O(MM) , 0, 0x000F3A16, 0), + MAKE_INST(INST_PF2ID , "pf2id" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0x1D), + MAKE_INST(INST_PF2IW , "pf2iw" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0x1C), + MAKE_INST(INST_PFACC , "pfacc" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0xAE), + MAKE_INST(INST_PFADD , "pfadd" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0x9E), + MAKE_INST(INST_PFCMPEQ , "pfcmpeq" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0xB0), + MAKE_INST(INST_PFCMPGE , "pfcmpge" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0x90), + MAKE_INST(INST_PFCMPGT , "pfcmpgt" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0xA0), + MAKE_INST(INST_PFMAX , "pfmax" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0xA4), + MAKE_INST(INST_PFMIN , "pfmin" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0x94), + MAKE_INST(INST_PFMUL , "pfmul" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0xB4), + MAKE_INST(INST_PFNACC , "pfnacc" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0x8A), + MAKE_INST(INST_PFPNACC , "pfpnacc" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0x8E), + MAKE_INST(INST_PFRCP , "pfrcp" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0x96), + MAKE_INST(INST_PFRCPIT1 , "pfrcpit1" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0xA6), + MAKE_INST(INST_PFRCPIT2 , "pfrcpit2" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0xB6), + MAKE_INST(INST_PFRSQIT1 , "pfrsqit1" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0xA7), + MAKE_INST(INST_PFRSQRT , "pfrsqrt" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0x97), + MAKE_INST(INST_PFSUB , "pfsub" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0x9A), + MAKE_INST(INST_PFSUBR , "pfsubr" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0xAA), + MAKE_INST(INST_PHADDD , "phaddd" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F3802, 0), + MAKE_INST(INST_PHADDSW , "phaddsw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F3803, 0), + MAKE_INST(INST_PHADDW , "phaddw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F3801, 0), + MAKE_INST(INST_PHMINPOSUW , "phminposuw" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3841, 0), + MAKE_INST(INST_PHSUBD , "phsubd" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F3806, 0), + MAKE_INST(INST_PHSUBSW , "phsubsw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F3807, 0), + MAKE_INST(INST_PHSUBW , "phsubw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F3805, 0), + MAKE_INST(INST_PI2FD , "pi2fd" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0x0D), + MAKE_INST(INST_PI2FW , "pi2fw" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0x0C), + MAKE_INST(INST_PINSRB , "pinsrb" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(GD) | O(MEM) , 0, 0x660F3A20, 0), + MAKE_INST(INST_PINSRD , "pinsrd" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(GD) | O(MEM) , 0, 0x660F3A22, 0), + MAKE_INST(INST_PINSRQ , "pinsrq" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(GQ) | O(MEM) , 0, 0x660F3A22, 0), + MAKE_INST(INST_PINSRW , "pinsrw" , G(MMU_RM_IMM8) , F(NONE) , O(MM_XMM) , O(GD) | O(MEM) , 0, 0x00000FC4, 0), + MAKE_INST(INST_PMADDUBSW , "pmaddubsw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F3804, 0), + MAKE_INST(INST_PMADDWD , "pmaddwd" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FF5, 0), + MAKE_INST(INST_PMAXSB , "pmaxsb" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F383C, 0), + MAKE_INST(INST_PMAXSD , "pmaxsd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F383D, 0), + MAKE_INST(INST_PMAXSW , "pmaxsw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FEE, 0), + MAKE_INST(INST_PMAXUB , "pmaxub" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FDE, 0), + MAKE_INST(INST_PMAXUD , "pmaxud" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F383F, 0), + MAKE_INST(INST_PMAXUW , "pmaxuw" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F383E, 0), + MAKE_INST(INST_PMINSB , "pminsb" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3838, 0), + MAKE_INST(INST_PMINSD , "pminsd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3839, 0), + MAKE_INST(INST_PMINSW , "pminsw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FEA, 0), + MAKE_INST(INST_PMINUB , "pminub" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FDA, 0), + MAKE_INST(INST_PMINUD , "pminud" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F383B, 0), + MAKE_INST(INST_PMINUW , "pminuw" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F383A, 0), + MAKE_INST(INST_PMOVMSKB , "pmovmskb" , G(MMU_RMI) , F(MOV) , O(GQD) , O(MM_XMM) , 0, 0x00000FD7, 0), + MAKE_INST(INST_PMOVSXBD , "pmovsxbd" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0x660F3821, 0), + MAKE_INST(INST_PMOVSXBQ , "pmovsxbq" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0x660F3822, 0), + MAKE_INST(INST_PMOVSXBW , "pmovsxbw" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0x660F3820, 0), + MAKE_INST(INST_PMOVSXDQ , "pmovsxdq" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0x660F3825, 0), + MAKE_INST(INST_PMOVSXWD , "pmovsxwd" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0x660F3823, 0), + MAKE_INST(INST_PMOVSXWQ , "pmovsxwq" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0x660F3824, 0), + MAKE_INST(INST_PMOVZXBD , "pmovzxbd" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0x660F3831, 0), + MAKE_INST(INST_PMOVZXBQ , "pmovzxbq" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0x660F3832, 0), + MAKE_INST(INST_PMOVZXBW , "pmovzxbw" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0x660F3830, 0), + MAKE_INST(INST_PMOVZXDQ , "pmovzxdq" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0x660F3835, 0), + MAKE_INST(INST_PMOVZXWD , "pmovzxwd" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0x660F3833, 0), + MAKE_INST(INST_PMOVZXWQ , "pmovzxwq" , G(MMU_RMI) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0x660F3834, 0), + MAKE_INST(INST_PMULDQ , "pmuldq" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3828, 0), + MAKE_INST(INST_PMULHRSW , "pmulhrsw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F380B, 0), + MAKE_INST(INST_PMULHUW , "pmulhuw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FE4, 0), + MAKE_INST(INST_PMULHW , "pmulhw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FE5, 0), + MAKE_INST(INST_PMULLD , "pmulld" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3840, 0), + MAKE_INST(INST_PMULLW , "pmullw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FD5, 0), + MAKE_INST(INST_PMULUDQ , "pmuludq" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FF4, 0), + MAKE_INST(INST_POP , "pop" , G(POP) , F(SPECIAL) , 0 , 0 , 0, 0x00000058, 0x0000008F), + MAKE_INST(INST_POPAD , "popad" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x00000061, 0), + MAKE_INST(INST_POPCNT , "popcnt" , G(R_RM) , F(NONE) , O(GQDW) , O(GQDW_MEM) , 0, 0xF3000FB8, 0), + MAKE_INST(INST_POPFD , "popfd" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x0000009D, 0), + MAKE_INST(INST_POPFQ , "popfq" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x0000009D, 0), + MAKE_INST(INST_POR , "por" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FEB, 0), + MAKE_INST(INST_PREFETCH , "prefetch" , G(MMU_PREFETCH) , F(NONE) , O(MEM) , O(IMM) , 0, 0 , 0), + MAKE_INST(INST_PSADBW , "psadbw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FF6, 0), + MAKE_INST(INST_PSHUFB , "pshufb" , G(MMU_RMI) , F(MOV) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F3800, 0), + MAKE_INST(INST_PSHUFD , "pshufd" , G(MMU_RM_IMM8) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0x66000F70, 0), + MAKE_INST(INST_PSHUFW , "pshufw" , G(MMU_RM_IMM8) , F(MOV) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F70, 0), + MAKE_INST(INST_PSHUFHW , "pshufhw" , G(MMU_RM_IMM8) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F70, 0), + MAKE_INST(INST_PSHUFLW , "pshuflw" , G(MMU_RM_IMM8) , F(MOV) , O(XMM) , O(XMM_MEM) , 0, 0xF2000F70, 0), + MAKE_INST(INST_PSIGNB , "psignb" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F3808, 0), + MAKE_INST(INST_PSIGND , "psignd" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F380A, 0), + MAKE_INST(INST_PSIGNW , "psignw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x000F3809, 0), + MAKE_INST(INST_PSLLD , "pslld" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM)|O(IMM), 6, 0x00000FF2, 0x00000F72), + MAKE_INST(INST_PSLLDQ , "pslldq" , G(MMU_RMI) , F(NONE) , O(XMM) , O(IMM) , 7, 0 , 0x66000F73), + MAKE_INST(INST_PSLLQ , "psllq" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM)|O(IMM), 6, 0x00000FF3, 0x00000F73), + MAKE_INST(INST_PSLLW , "psllw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM)|O(IMM), 6, 0x00000FF1, 0x00000F71), + MAKE_INST(INST_PSRAD , "psrad" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM)|O(IMM), 4, 0x00000FE2, 0x00000F72), + MAKE_INST(INST_PSRAW , "psraw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM)|O(IMM), 4, 0x00000FE1, 0x00000F71), + MAKE_INST(INST_PSRLD , "psrld" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM)|O(IMM), 2, 0x00000FD2, 0x00000F72), + MAKE_INST(INST_PSRLDQ , "psrldq" , G(MMU_RMI) , F(NONE) , O(XMM) , O(IMM) , 3, 0 , 0x66000F73), + MAKE_INST(INST_PSRLQ , "psrlq" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM)|O(IMM), 2, 0x00000FD3, 0x00000F73), + MAKE_INST(INST_PSRLW , "psrlw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM)|O(IMM), 2, 0x00000FD1, 0x00000F71), + MAKE_INST(INST_PSUBB , "psubb" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FF8, 0), + MAKE_INST(INST_PSUBD , "psubd" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FFA, 0), + MAKE_INST(INST_PSUBQ , "psubq" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FFB, 0), + MAKE_INST(INST_PSUBSB , "psubsb" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FE8, 0), + MAKE_INST(INST_PSUBSW , "psubsw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FE9, 0), + MAKE_INST(INST_PSUBUSB , "psubusb" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FD8, 0), + MAKE_INST(INST_PSUBUSW , "psubusw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FD9, 0), + MAKE_INST(INST_PSUBW , "psubw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FF9, 0), + MAKE_INST(INST_PSWAPD , "pswapd" , G(MMU_RM_3DNOW) , F(NONE) , O(MM) , O(MM_MEM) , 0, 0x00000F0F, 0xBB), + MAKE_INST(INST_PTEST , "ptest" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3817, 0), + MAKE_INST(INST_PUNPCKHBW , "punpckhbw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F68, 0), + MAKE_INST(INST_PUNPCKHDQ , "punpckhdq" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F6A, 0), + MAKE_INST(INST_PUNPCKHQDQ , "punpckhqdq" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F6D, 0), + MAKE_INST(INST_PUNPCKHWD , "punpckhwd" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F69, 0), + MAKE_INST(INST_PUNPCKLBW , "punpcklbw" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F60, 0), + MAKE_INST(INST_PUNPCKLDQ , "punpckldq" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F62, 0), + MAKE_INST(INST_PUNPCKLQDQ , "punpcklqdq" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F6C, 0), + MAKE_INST(INST_PUNPCKLWD , "punpcklwd" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000F61, 0), + MAKE_INST(INST_PUSH , "push" , G(PUSH) , F(SPECIAL) , 0 , 0 , 6, 0x00000050, 0x000000FF), + MAKE_INST(INST_PUSHAD , "pushad" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x00000060, 0), + MAKE_INST(INST_PUSHFD , "pushfd" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x0000009C, 0), + MAKE_INST(INST_PUSHFQ , "pushfq" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x0000009C, 0), + MAKE_INST(INST_PXOR , "pxor" , G(MMU_RMI) , F(NONE) , O(MM_XMM) , O(MM_XMM_MEM) , 0, 0x00000FEF, 0), + MAKE_INST(INST_RCL , "rcl" , G(ROT) , F(SPECIAL) , O(GQDWB_MEM) , O(GB)|O(IMM) , 2, 0 , 0), + MAKE_INST(INST_RCPPS , "rcpps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F53, 0), + MAKE_INST(INST_RCPSS , "rcpss" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F53, 0), + MAKE_INST(INST_RCR , "rcr" , G(ROT) , F(SPECIAL) , O(GQDWB_MEM) , O(GB)|O(IMM) , 3, 0 , 0), + MAKE_INST(INST_RDTSC , "rdtsc" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x00000F31, 0), + MAKE_INST(INST_RDTSCP , "rdtscp" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x000F01F9, 0), + MAKE_INST(INST_REP_LODSB , "rep lodsb" , G(REP) , F(SPECIAL) , O(MEM) , 0 , 0, 0xF30000AC, 1 /* Size of mem */), + MAKE_INST(INST_REP_LODSD , "rep lodsd" , G(REP) , F(SPECIAL) , O(MEM) , 0 , 0, 0xF30000AC, 4 /* Size of mem */), + MAKE_INST(INST_REP_LODSQ , "rep lodsq" , G(REP) , F(SPECIAL) , O(MEM) , 0 , 0, 0xF30000AC, 8 /* Size of mem */), + MAKE_INST(INST_REP_LODSW , "rep lodsw" , G(REP) , F(SPECIAL) , O(MEM) , 0 , 0, 0xF30000AC, 2 /* Size of mem */), + MAKE_INST(INST_REP_MOVSB , "rep movsb" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF30000A4, 1 /* Size of mem */), + MAKE_INST(INST_REP_MOVSD , "rep movsd" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF30000A4, 4 /* Size of mem */), + MAKE_INST(INST_REP_MOVSQ , "rep movsq" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF30000A4, 8 /* Size of mem */), + MAKE_INST(INST_REP_MOVSW , "rep movsw" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF30000A4, 2 /* Size of mem */), + MAKE_INST(INST_REP_STOSB , "rep stosb" , G(REP) , F(SPECIAL) , O(MEM) , 0 , 0, 0xF30000AA, 1 /* Size of mem */), + MAKE_INST(INST_REP_STOSD , "rep stosd" , G(REP) , F(SPECIAL) , O(MEM) , 0 , 0, 0xF30000AA, 4 /* Size of mem */), + MAKE_INST(INST_REP_STOSQ , "rep stosq" , G(REP) , F(SPECIAL) , O(MEM) , 0 , 0, 0xF30000AA, 8 /* Size of mem */), + MAKE_INST(INST_REP_STOSW , "rep stosw" , G(REP) , F(SPECIAL) , O(MEM) , 0 , 0, 0xF30000AA, 2 /* Size of mem */), + MAKE_INST(INST_REPE_CMPSB , "repe cmpsb" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF30000A6, 1 /* Size of mem */), + MAKE_INST(INST_REPE_CMPSD , "repe cmpsd" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF30000A6, 4 /* Size of mem */), + MAKE_INST(INST_REPE_CMPSQ , "repe cmpsq" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF30000A6, 8 /* Size of mem */), + MAKE_INST(INST_REPE_CMPSW , "repe cmpsw" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF30000A6, 2 /* Size of mem */), + MAKE_INST(INST_REPE_SCASB , "repe scasb" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF30000AE, 1 /* Size of mem */), + MAKE_INST(INST_REPE_SCASD , "repe scasd" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF30000AE, 4 /* Size of mem */), + MAKE_INST(INST_REPE_SCASQ , "repe scasq" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF30000AE, 8 /* Size of mem */), + MAKE_INST(INST_REPE_SCASW , "repe scasw" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF30000AE, 2 /* Size of mem */), + MAKE_INST(INST_REPNE_CMPSB , "repne cmpsb" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF20000A6, 1 /* Size of mem */), + MAKE_INST(INST_REPNE_CMPSD , "repne cmpsd" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF20000A6, 4 /* Size of mem */), + MAKE_INST(INST_REPNE_CMPSQ , "repne cmpsq" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF20000A6, 8 /* Size of mem */), + MAKE_INST(INST_REPNE_CMPSW , "repne cmpsw" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF20000A6, 2 /* Size of mem */), + MAKE_INST(INST_REPNE_SCASB , "repne scasb" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF20000AE, 1 /* Size of mem */), + MAKE_INST(INST_REPNE_SCASD , "repne scasd" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF20000AE, 4 /* Size of mem */), + MAKE_INST(INST_REPNE_SCASQ , "repne scasq" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF20000AE, 8 /* Size of mem */), + MAKE_INST(INST_REPNE_SCASW , "repne scasw" , G(REP) , F(SPECIAL) , O(MEM) , O(MEM) , 0, 0xF20000AE, 2 /* Size of mem */), + MAKE_INST(INST_RET , "ret" , G(RET) , F(SPECIAL) , 0 , 0 , 0, 0 , 0), + MAKE_INST(INST_ROL , "rol" , G(ROT) , F(SPECIAL) , O(GQDWB_MEM) , O(GB)|O(IMM) , 0, 0 , 0), + MAKE_INST(INST_ROR , "ror" , G(ROT) , F(SPECIAL) , O(GQDWB_MEM) , O(GB)|O(IMM) , 1, 0 , 0), + MAKE_INST(INST_ROUNDPD , "roundpd" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A09, 0), + MAKE_INST(INST_ROUNDPS , "roundps" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A08, 0), + MAKE_INST(INST_ROUNDSD , "roundsd" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A0B, 0), + MAKE_INST(INST_ROUNDSS , "roundss" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x660F3A0A, 0), + MAKE_INST(INST_RSQRTPS , "rsqrtps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F52, 0), + MAKE_INST(INST_RSQRTSS , "rsqrtss" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F52, 0), + MAKE_INST(INST_SAHF , "sahf" , G(EMIT) , F(SPECIAL) , 0 , 0 , 0, 0x0000009E, 0), + MAKE_INST(INST_SAL , "sal" , G(ROT) , F(SPECIAL) , O(GQDWB_MEM) , O(GB)|O(IMM) , 4, 0 , 0), + MAKE_INST(INST_SAR , "sar" , G(ROT) , F(SPECIAL) , O(GQDWB_MEM) , O(GB)|O(IMM) , 7, 0 , 0), + MAKE_INST(INST_SBB , "sbb" , G(ALU) , F(LOCKABLE) , O(GQDWB_MEM) , O(GQDWB_MEM)|O(IMM) , 3, 0x00000018, 0x00000080), + MAKE_INST(INST_SETA , "seta" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F97, 0), + MAKE_INST(INST_SETAE , "setae" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F93, 0), + MAKE_INST(INST_SETB , "setb" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F92, 0), + MAKE_INST(INST_SETBE , "setbe" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F96, 0), + MAKE_INST(INST_SETC , "setc" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F92, 0), + MAKE_INST(INST_SETE , "sete" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F94, 0), + MAKE_INST(INST_SETG , "setg" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F9F, 0), + MAKE_INST(INST_SETGE , "setge" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F9D, 0), + MAKE_INST(INST_SETL , "setl" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F9C, 0), + MAKE_INST(INST_SETLE , "setle" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F9E, 0), + MAKE_INST(INST_SETNA , "setna" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F96, 0), + MAKE_INST(INST_SETNAE , "setnae" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F92, 0), + MAKE_INST(INST_SETNB , "setnb" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F93, 0), + MAKE_INST(INST_SETNBE , "setnbe" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F97, 0), + MAKE_INST(INST_SETNC , "setnc" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F93, 0), + MAKE_INST(INST_SETNE , "setne" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F95, 0), + MAKE_INST(INST_SETNG , "setng" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F9E, 0), + MAKE_INST(INST_SETNGE , "setnge" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F9C, 0), + MAKE_INST(INST_SETNL , "setnl" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F9D, 0), + MAKE_INST(INST_SETNLE , "setnle" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F9F, 0), + MAKE_INST(INST_SETNO , "setno" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F91, 0), + MAKE_INST(INST_SETNP , "setnp" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F9B, 0), + MAKE_INST(INST_SETNS , "setns" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F99, 0), + MAKE_INST(INST_SETNZ , "setnz" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F95, 0), + MAKE_INST(INST_SETO , "seto" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F90, 0), + MAKE_INST(INST_SETP , "setp" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F9A, 0), + MAKE_INST(INST_SETPE , "setpe" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F9A, 0), + MAKE_INST(INST_SETPO , "setpo" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F9B, 0), + MAKE_INST(INST_SETS , "sets" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F98, 0), + MAKE_INST(INST_SETZ , "setz" , G(RM_B) , F(NONE) , O(GB_MEM) , 0 , 0, 0x00000F94, 0), + MAKE_INST(INST_SFENCE , "sfence" , G(EMIT) , F(NONE) , 0 , 0 , 0, 0x000FAEF8, 0), + MAKE_INST(INST_SHL , "shl" , G(ROT) , F(SPECIAL) , O(GQDWB_MEM) , O(GB)|O(IMM) , 4, 0 , 0), + MAKE_INST(INST_SHLD , "shld" , G(SHLD_SHRD) , F(SPECIAL) , O(GQDWB_MEM) , O(GB) , 0, 0x00000FA4, 0), + MAKE_INST(INST_SHR , "shr" , G(ROT) , F(SPECIAL) , O(GQDWB_MEM) , O(GB)|O(IMM) , 5, 0 , 0), + MAKE_INST(INST_SHRD , "shrd" , G(SHLD_SHRD) , F(SPECIAL) , O(GQDWB_MEM) , O(GQDWB) , 0, 0x00000FAC, 0), + MAKE_INST(INST_SHUFPD , "shufpd" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000FC6, 0), + MAKE_INST(INST_SHUFPS , "shufps" , G(MMU_RM_IMM8) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000FC6, 0), + MAKE_INST(INST_SQRTPD , "sqrtpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F51, 0), + MAKE_INST(INST_SQRTPS , "sqrtps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F51, 0), + MAKE_INST(INST_SQRTSD , "sqrtsd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF2000F51, 0), + MAKE_INST(INST_SQRTSS , "sqrtss" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F51, 0), + MAKE_INST(INST_STC , "stc" , G(EMIT) , F(NONE) , 0 , 0 , 0, 0x000000F9, 0), + MAKE_INST(INST_STD , "std" , G(EMIT) , F(NONE) , 0 , 0 , 0, 0x000000FD, 0), + MAKE_INST(INST_STMXCSR , "stmxcsr" , G(M) , F(NONE) , O(MEM) , 0 , 3, 0x00000FAE, 0), + MAKE_INST(INST_SUB , "sub" , G(ALU) , F(LOCKABLE) , O(GQDWB_MEM) , O(GQDWB_MEM)|O(IMM) , 5, 0x00000028, 0x00000080), + MAKE_INST(INST_SUBPD , "subpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F5C, 0), + MAKE_INST(INST_SUBPS , "subps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F5C, 0), + MAKE_INST(INST_SUBSD , "subsd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF2000F5C, 0), + MAKE_INST(INST_SUBSS , "subss" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0xF3000F5C, 0), + MAKE_INST(INST_TEST , "test" , G(TEST) , F(NONE) , O(GQDWB_MEM) , O(GQDWB)|O(IMM) , 0, 0 , 0), + MAKE_INST(INST_UCOMISD , "ucomisd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F2E, 0), + MAKE_INST(INST_UCOMISS , "ucomiss" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F2E, 0), + MAKE_INST(INST_UD2 , "ud2" , G(EMIT) , F(NONE) , 0 , 0 , 0, 0x00000F0B, 0), + MAKE_INST(INST_UNPCKHPD , "unpckhpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F15, 0), + MAKE_INST(INST_UNPCKHPS , "unpckhps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F15, 0), + MAKE_INST(INST_UNPCKLPD , "unpcklpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F14, 0), + MAKE_INST(INST_UNPCKLPS , "unpcklps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F14, 0), + MAKE_INST(INST_XADD , "xadd" , G(RM_R) , F(LOCKABLE) , O(GQDWB_MEM) , O(GQDWB) , 0, 0x00000FC0, 0), + MAKE_INST(INST_XCHG , "xchg" , G(XCHG) , F(LOCKABLE) , O(GQDWB_MEM) , O(GQDWB) , 0, 0 , 0), + MAKE_INST(INST_XOR , "xor" , G(ALU) , F(LOCKABLE) , O(GQDWB_MEM) , O(GQDWB_MEM)|O(IMM) , 6, 0x00000030, 0x00000080), + MAKE_INST(INST_XORPD , "xorpd" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x66000F57, 0), + MAKE_INST(INST_XORPS , "xorps" , G(MMU_RMI) , F(NONE) , O(XMM) , O(XMM_MEM) , 0, 0x00000F57, 0) +}; + +#undef G +#undef F +#undef O + +#undef MAKE_INST + +} // AsmJit namespace + +#include "ApiEnd.h" diff --git a/lib/AsmJit/DefsX86X64.h b/lib/AsmJit/DefsX86X64.h new file mode 100644 index 0000000..9ac0e74 --- /dev/null +++ b/lib/AsmJit/DefsX86X64.h @@ -0,0 +1,1953 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_DEFSX86X64_H +#define _ASMJIT_DEFSX86X64_H + +#if !defined(_ASMJIT_DEFS_H) +#warning "AsmJit/DefsX86X64.h can be only included by AsmJit/Defs.h" +#endif // _ASMJIT_DEFS_H + +// [Dependencies] +#include "Build.h" +#include "Util.h" + +#include <stdlib.h> +#include <string.h> + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +//! @addtogroup AsmJit_Core +//! @{ + +// ============================================================================ +// [AsmJit::REG_NUM] +// ============================================================================ + +//! @var REG_NUM +//! @brief Count of General purpose registers and XMM registers. +//! +//! Count of general purpose registers and XMM registers depends to current +//! bit-mode. If application is compiled for 32-bit platform then this number +//! is 8, 64-bit platforms have 8 extra general purpose and xmm registers (16 +//! total). + +//! @brief Count of registers. +enum REG_NUM +{ + //! @var REG_NUM_BASE + //! + //! Count of general purpose registers and XMM registers depends to current + //! bit-mode. If application is compiled for 32-bit platform then this number + //! is 8, 64-bit platforms have 8 extra general purpose and xmm registers (16 + //! total). +#if defined(ASMJIT_X86) + REG_NUM_BASE = 8, +#else + REG_NUM_BASE = 16, +#endif // ASMJIT + + //! @brief Count of general purpose registers. + //! + //! 8 in 32-bit mode and 16 in 64-bit mode. + REG_NUM_GP = REG_NUM_BASE, + + //! @brief Count of MM registers (always 8). + REG_NUM_MM = 8, + + //! @brief Count of FPU stack registers (always 8). + REG_NUM_FPU = 8, + + //! @brief Count of XMM registers. + //! + //! 8 in 32-bit mode and 16 in 64-bit mode. + REG_NUM_XMM = REG_NUM_BASE +}; + +// ============================================================================ +// [AsmJit::REG_INDEX] +// ============================================================================ + +//! @brief Valid X86 register indexes. +//! +//! These codes are real, don't miss with @c REG enum! and don't use these +//! values if you are not writing AsmJit code. +enum REG_INDEX +{ + //! @brief Mask for register code (index). + REG_INDEX_MASK = 0x00FF, + + //! @brief ID for AX/EAX/RAX registers. + REG_INDEX_EAX = 0, + //! @brief ID for CX/ECX/RCX registers. + REG_INDEX_ECX = 1, + //! @brief ID for DX/EDX/RDX registers. + REG_INDEX_EDX = 2, + //! @brief ID for BX/EBX/RBX registers. + REG_INDEX_EBX = 3, + //! @brief ID for SP/ESP/RSP registers. + REG_INDEX_ESP = 4, + //! @brief ID for BP/EBP/RBP registers. + REG_INDEX_EBP = 5, + //! @brief ID for SI/ESI/RSI registers. + REG_INDEX_ESI = 6, + //! @brief ID for DI/EDI/RDI registers. + REG_INDEX_EDI = 7, + +#if defined(ASMJIT_X64) + //! @brief ID for AX/EAX/RAX registers. + REG_INDEX_RAX = 0, + //! @brief ID for CX/ECX/RCX registers. + REG_INDEX_RCX = 1, + //! @brief ID for DX/EDX/RDX registers. + REG_INDEX_RDX = 2, + //! @brief ID for BX/EBX/RBX registers. + REG_INDEX_RBX = 3, + //! @brief ID for SP/ESP/RSP registers. + REG_INDEX_RSP = 4, + //! @brief ID for BP/EBP/RBP registers. + REG_INDEX_RBP = 5, + //! @brief ID for SI/ESI/RSI registers. + REG_INDEX_RSI = 6, + //! @brief ID for DI/EDI/RDI registers. + REG_INDEX_RDI = 7, + + //! @brief ID for r8 register (additional register introduced by 64-bit architecture). + REG_INDEX_R8 = 8, + //! @brief ID for R9 register (additional register introduced by 64-bit architecture). + REG_INDEX_R9 = 9, + //! @brief ID for R10 register (additional register introduced by 64-bit architecture). + REG_INDEX_R10 = 10, + //! @brief ID for R11 register (additional register introduced by 64-bit architecture). + REG_INDEX_R11 = 11, + //! @brief ID for R12 register (additional register introduced by 64-bit architecture). + REG_INDEX_R12 = 12, + //! @brief ID for R13 register (additional register introduced by 64-bit architecture). + REG_INDEX_R13 = 13, + //! @brief ID for R14 register (additional register introduced by 64-bit architecture). + REG_INDEX_R14 = 14, + //! @brief ID for R15 register (additional register introduced by 64-bit architecture). + REG_INDEX_R15 = 15, +#endif // ASMJIT_X64 + + //! @brief ID for mm0 register. + REG_INDEX_MM0 = 0, + //! @brief ID for mm1 register. + REG_INDEX_MM1 = 1, + //! @brief ID for mm2 register. + REG_INDEX_MM2 = 2, + //! @brief ID for mm3 register. + REG_INDEX_MM3 = 3, + //! @brief ID for mm4 register. + REG_INDEX_MM4 = 4, + //! @brief ID for mm5 register. + REG_INDEX_MM5 = 5, + //! @brief ID for mm6 register. + REG_INDEX_MM6 = 6, + //! @brief ID for mm7 register. + REG_INDEX_MM7 = 7, + + //! @brief ID for xmm0 register. + REG_INDEX_XMM0 = 0, + //! @brief ID for xmm1 register. + REG_INDEX_XMM1 = 1, + //! @brief ID for xmm2 register. + REG_INDEX_XMM2 = 2, + //! @brief ID for xmm3 register. + REG_INDEX_XMM3 = 3, + //! @brief ID for xmm4 register. + REG_INDEX_XMM4 = 4, + //! @brief ID for xmm5 register. + REG_INDEX_XMM5 = 5, + //! @brief ID for xmm6 register. + REG_INDEX_XMM6 = 6, + //! @brief ID for xmm7 register. + REG_INDEX_XMM7 = 7 + +#if defined(ASMJIT_X64) + , + + //! @brief ID for xmm8 register (additional register introduced by 64-bit architecture). + REG_INDEX_XMM8 = 8, + //! @brief ID for xmm9 register (additional register introduced by 64-bit architecture). + REG_INDEX_XMM9 = 9, + //! @brief ID for xmm10 register (additional register introduced by 64-bit architecture). + REG_INDEX_XMM10 = 10, + //! @brief ID for xmm11 register (additional register introduced by 64-bit architecture). + REG_INDEX_XMM11 = 11, + //! @brief ID for xmm12 register (additional register introduced by 64-bit architecture). + REG_INDEX_XMM12 = 12, + //! @brief ID for xmm13 register (additional register introduced by 64-bit architecture). + REG_INDEX_XMM13 = 13, + //! @brief ID for xmm14 register (additional register introduced by 64-bit architecture). + REG_INDEX_XMM14 = 14, + //! @brief ID for xmm15 register (additional register introduced by 64-bit architecture). + REG_INDEX_XMM15 = 15 +#endif // ASMJIT_X64 +}; + +// ============================================================================ +// [AsmJit::REG_TYPE] +// ============================================================================ + +//! @brief Pseudo (not real X86) register types. +enum REG_TYPE +{ + //! @brief Mask for register type. + REG_TYPE_MASK = 0xFF00, + + // First byte contains register type (mask 0xFF00), Second byte contains + // register index code. + + // -------------------------------------------------------------------------- + // [GP Register Types] + // -------------------------------------------------------------------------- + + //! @brief 8-bit general purpose register type (LO). + REG_TYPE_GPB_LO = 0x0100, + //! @brief 8-bit general purpose register type (HI, only AH, BH, CH, DH). + REG_TYPE_GPB_HI = 0x0200, + //! @brief 16-bit general purpose register type. + REG_TYPE_GPW = 0x1000, + //! @brief 32-bit general purpose register type. + REG_TYPE_GPD = 0x2000, + //! @brief 64-bit general purpose register type. + REG_TYPE_GPQ = 0x3000, + + //! @var REG_GPN + //! @brief 32-bit or 64-bit general purpose register type. + + // native 32-bit or 64-bit register type (depends to x86 or x64 mode). +#if defined(ASMJIT_X86) + REG_TYPE_GPN = REG_TYPE_GPD, +#else + REG_TYPE_GPN = REG_TYPE_GPQ, +#endif + + // -------------------------------------------------------------------------- + // [X87 (FPU) Register Type] + // -------------------------------------------------------------------------- + + //! @brief X87 (FPU) register type. + REG_TYPE_X87 = 0x5000, + + // -------------------------------------------------------------------------- + // [MM Register Type] + // -------------------------------------------------------------------------- + + //! @brief 64-bit MM register type. + REG_TYPE_MM = 0x6000, + + // -------------------------------------------------------------------------- + // [XMM Register Type] + // -------------------------------------------------------------------------- + + //! @brief 128-bit XMM register type. + REG_TYPE_XMM = 0x7000 +}; + +// ============================================================================ +// [AsmJit::REG_CODE] +// ============================================================================ + +//! @brief Pseudo (not real X86) register codes used for generating opcodes. +//! +//! From this register code can be generated real x86 register ID, type of +//! register and size of register. +enum REG_CODE +{ + // -------------------------------------------------------------------------- + // [8-bit Registers] + // -------------------------------------------------------------------------- + + REG_AL = REG_TYPE_GPB_LO, + REG_CL, + REG_DL, + REG_BL, +#if defined(ASMJIT_X64) + REG_SPL, + REG_BPL, + REG_SIL, + REG_DIL, +#endif // ASMJIT_X64 + +#if defined(ASMJIT_X64) + REG_R8B, + REG_R9B, + REG_R10B, + REG_R11B, + REG_R12B, + REG_R13B, + REG_R14B, + REG_R15B, +#endif // ASMJIT_X64 + + REG_AH = REG_TYPE_GPB_HI, + REG_CH, + REG_DH, + REG_BH, + + // -------------------------------------------------------------------------- + // [16-bit Registers] + // -------------------------------------------------------------------------- + + REG_AX = REG_TYPE_GPW, + REG_CX, + REG_DX, + REG_BX, + REG_SP, + REG_BP, + REG_SI, + REG_DI, +#if defined(ASMJIT_X64) + REG_R8W, + REG_R9W, + REG_R10W, + REG_R11W, + REG_R12W, + REG_R13W, + REG_R14W, + REG_R15W, +#endif // ASMJIT_X64 + + // -------------------------------------------------------------------------- + // [32-bit Registers] + // -------------------------------------------------------------------------- + + REG_EAX = REG_TYPE_GPD, + REG_ECX, + REG_EDX, + REG_EBX, + REG_ESP, + REG_EBP, + REG_ESI, + REG_EDI, +#if defined(ASMJIT_X64) + REG_R8D, + REG_R9D, + REG_R10D, + REG_R11D, + REG_R12D, + REG_R13D, + REG_R14D, + REG_R15D, +#endif // ASMJIT_X64 + + // -------------------------------------------------------------------------- + // [64-bit Registers] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_X64) + REG_RAX = REG_TYPE_GPQ, + REG_RCX, + REG_RDX, + REG_RBX, + REG_RSP, + REG_RBP, + REG_RSI, + REG_RDI, + REG_R8, + REG_R9, + REG_R10, + REG_R11, + REG_R12, + REG_R13, + REG_R14, + REG_R15, +#endif // ASMJIT_X64 + + // -------------------------------------------------------------------------- + // [MM Registers] + // -------------------------------------------------------------------------- + + REG_MM0 = REG_TYPE_MM, + REG_MM1, + REG_MM2, + REG_MM3, + REG_MM4, + REG_MM5, + REG_MM6, + REG_MM7, + + // -------------------------------------------------------------------------- + // [XMM Registers] + // -------------------------------------------------------------------------- + + REG_XMM0 = REG_TYPE_XMM, + REG_XMM1, + REG_XMM2, + REG_XMM3, + REG_XMM4, + REG_XMM5, + REG_XMM6, + REG_XMM7, +#if defined(ASMJIT_X64) + REG_XMM8, + REG_XMM9, + REG_XMM10, + REG_XMM11, + REG_XMM12, + REG_XMM13, + REG_XMM14, + REG_XMM15, +#endif // ASMJIT_X64 + + // -------------------------------------------------------------------------- + // [Native registers (depends if processor runs in 32-bit or 64-bit mode)] + // -------------------------------------------------------------------------- + + REG_NAX = REG_TYPE_GPN, + REG_NCX, + REG_NDX, + REG_NBX, + REG_NSP, + REG_NBP, + REG_NSI, + REG_NDI +}; + +// ============================================================================ +// [AsmJit::SEGMENT_PREFIX] +// ============================================================================ + +//! @brief Segment override prefixes. +enum SEGMENT_PREFIX +{ + // DO NOT MODIFY INDEX CODES - They are used by logger in this order. + + //! @brief No segment override prefix. + SEGMENT_NONE = 0, + //! @brief Use 'cs' segment override prefix. + SEGMENT_CS = 1, + //! @brief Use 'ss' segment override prefix. + SEGMENT_SS = 2, + //! @brief Use 'ds' segment override prefix. + SEGMENT_DS = 3, + //! @brief Use 'es' segment override prefix. + SEGMENT_ES = 4, + //! @brief Use 'fs' segment override prefix. + SEGMENT_FS = 5, + //! @brief Use 'gs' segment override prefix. + SEGMENT_GS = 6, + //! @brief End of prefix codes + _SEGMENT_END +}; + +// ============================================================================ +// [AsmJit::PREFETCH_HINT] +// ============================================================================ + +//! @brief Prefetch hints. +enum PREFETCH_HINT +{ + //! @brief Prefetch to L0 cache. + PREFETCH_T0 = 1, + //! @brief Prefetch to L1 cache. + PREFETCH_T1 = 2, + //! @brief Prefetch to L2 cache. + PREFETCH_T2 = 3, + //! @brief Prefetch using NT hint. + PREFETCH_NTA = 0 +}; + +// ============================================================================ +// [AsmJit::CONDITION] +// ============================================================================ + +//! @brief Condition codes. +enum CONDITION +{ + //! @brief No condition code. + C_NO_CONDITION = -1, + + // Condition codes from processor manuals. + C_A = 0x7, + C_AE = 0x3, + C_B = 0x2, + C_BE = 0x6, + C_C = 0x2, + C_E = 0x4, + C_G = 0xF, + C_GE = 0xD, + C_L = 0xC, + C_LE = 0xE, + C_NA = 0x6, + C_NAE = 0x2, + C_NB = 0x3, + C_NBE = 0x7, + C_NC = 0x3, + C_NE = 0x5, + C_NG = 0xE, + C_NGE = 0xC, + C_NL = 0xD, + C_NLE = 0xF, + C_NO = 0x1, + C_NP = 0xB, + C_NS = 0x9, + C_NZ = 0x5, + C_O = 0x0, + C_P = 0xA, + C_PE = 0xA, + C_PO = 0xB, + C_S = 0x8, + C_Z = 0x4, + + // Simplified condition codes + C_OVERFLOW = 0x0, + C_NO_OVERFLOW = 0x1, + C_BELOW = 0x2, + C_ABOVE_EQUAL = 0x3, + C_EQUAL = 0x4, + C_NOT_EQUAL = 0x5, + C_BELOW_EQUAL = 0x6, + C_ABOVE = 0x7, + C_SIGN = 0x8, + C_NOT_SIGN = 0x9, + C_PARITY_EVEN = 0xA, + C_PARITY_ODD = 0xB, + C_LESS = 0xC, + C_GREATER_EQUAL = 0xD, + C_LESS_EQUAL = 0xE, + C_GREATER = 0xF, + + // aliases + C_ZERO = 0x4, + C_NOT_ZERO = 0x5, + C_NEGATIVE = 0x8, + C_POSITIVE = 0x9, + + // x87 floating point only + C_FP_UNORDERED = 16, + C_FP_NOT_UNORDERED = 17 +}; + +//! @brief Returns the equivalent of !cc. +//! +//! Negation of the default no_condition (-1) results in a non-default +//! no_condition value (-2). As long as tests for no_condition check +//! for condition < 0, this will work as expected. +static inline CONDITION negateCondition(CONDITION cc) +{ + return static_cast<CONDITION>(cc ^ 1); +} + +//! @brief Corresponds to transposing the operands of a comparison. +static inline CONDITION reverseCondition(CONDITION cc) +{ + switch (cc) { + case C_BELOW: + return C_ABOVE; + case C_ABOVE: + return C_BELOW; + case C_ABOVE_EQUAL: + return C_BELOW_EQUAL; + case C_BELOW_EQUAL: + return C_ABOVE_EQUAL; + case C_LESS: + return C_GREATER; + case C_GREATER: + return C_LESS; + case C_GREATER_EQUAL: + return C_LESS_EQUAL; + case C_LESS_EQUAL: + return C_GREATER_EQUAL; + default: + return cc; + }; +} + +struct ASMJIT_API ConditionToInstruction +{ + //! @brief Used to map condition code to jcc instructions. + static const uint32_t _jcctable[16]; + //! @brief Used to map condition code to cmovcc instructions. + static const uint32_t _cmovcctable[16]; + //! @brief Used to map condition code to setcc instructions. + static const uint32_t _setcctable[16]; + + //! @brief Translate condition code @a cc to jcc instruction code. + //! @sa @c INST_CODE, @c INST_J. + static inline uint32_t toJCC(CONDITION cc) ASMJIT_NOTHROW + { + ASMJIT_ASSERT(static_cast<uint32_t>(cc) <= 0xF); + return _jcctable[cc]; + } + + //! @brief Translate condition code @a cc to cmovcc instruction code. + //! @sa @c INST_CODE, @c INST_CMOV. + static inline uint32_t toCMovCC(CONDITION cc) ASMJIT_NOTHROW + { + ASMJIT_ASSERT(static_cast<uint32_t>(cc) <= 0xF); + return _cmovcctable[cc]; + } + + //! @brief Translate condition code @a cc to setcc instruction code. + //! @sa @c INST_CODE, @c INST_SET. + static inline uint32_t toSetCC(CONDITION cc) ASMJIT_NOTHROW + { + ASMJIT_ASSERT(static_cast<uint32_t>(cc) <= 0xF); + return _setcctable[cc]; + } +}; + +// ============================================================================ +// [AsmJit::SCALE] +// ============================================================================ + +//! @brief Scale, can be used for addressing. +//! +//! See @c Op and addressing methods like @c byte_ptr(), @c word_ptr(), +//! @c dword_ptr(), etc... +enum SCALE +{ + //! @brief Scale 1 times (no scale). + TIMES_1 = 0, + //! @brief Scale 2 times (same as shifting to left by 1). + TIMES_2 = 1, + //! @brief Scale 4 times (same as shifting to left by 2). + TIMES_4 = 2, + //! @brief Scale 8 times (same as shifting to left by 3). + TIMES_8 = 3 +}; + +// ============================================================================ +// [AsmJit::HINT] +// ============================================================================ + +//! @brief Condition hint, see @c AsmJit::Assembler::jz(), @c AsmJit::Compiler::jz() +//! and friends. +enum HINT +{ + //! @brief No hint. + HINT_NONE = 0x00, + //! @brief Condition will be taken (likely). + HINT_TAKEN = 0x01, + //! @brief Condition will be not taken (unlikely). + HINT_NOT_TAKEN = 0x02 +}; + +//! @brief Hint byte value is the byte that will be emitted if hint flag +//! is specified by @c HINT. +enum HINT_BYTE_VALUE +{ + //! @brief Condition will be taken (likely). + HINT_BYTE_VALUE_TAKEN = 0x3E, + //! @brief Condition will be not taken (unlikely). + HINT_BYTE_VALUE_NOT_TAKEN = 0x2E +}; + +// ============================================================================ +// [AsmJit::FP_STATUS] +// ============================================================================ + +//! @brief Floating point status. +enum FP_STATUS +{ + FP_C0 = 0x100, + FP_C1 = 0x200, + FP_C2 = 0x400, + FP_C3 = 0x4000, + FP_CC_MASK = 0x4500 +}; + +// ============================================================================ +// [AsmJit::FP_CW] +// ============================================================================ + +//! @brief Floating point control word. +enum FP_CW +{ + FP_CW_INVOPEX_MASK = 0x001, + FP_CW_DENOPEX_MASK = 0x002, + FP_CW_ZERODIV_MASK = 0x004, + FP_CW_OVFEX_MASK = 0x008, + FP_CW_UNDFEX_MASK = 0x010, + FP_CW_PRECEX_MASK = 0x020, + FP_CW_PRECC_MASK = 0x300, + FP_CW_ROUNDC_MASK = 0xC00, + + // Values for precision control. + FP_CW_PREC_SINGLE = 0x000, + FP_CW_PREC_DOUBLE = 0x200, + FP_CW_PREC_EXTENDED = 0x300, + + // Values for rounding control. + FP_CW_ROUND_NEAREST = 0x000, + FP_CW_ROUND_DOWN = 0x400, + FP_CW_ROUND_UP = 0x800, + FP_CW_ROUND_TOZERO = 0xC00 +}; + +// ============================================================================ +// [AsmJit::INST_CODE] +// ============================================================================ + +//! @brief Instruction codes. +//! +//! Note that these instruction codes are AsmJit specific. Each instruction is +//! unique ID into AsmJit instruction table. Instruction codes are used together +//! with AsmJit::Assembler and you can also use instruction codes to serialize +//! instructions by @ref AssemblerCore::_emitInstruction() or +//! @ref CompilerCore::_emitInstruction() +enum INST_CODE +{ + INST_ADC, // X86/X64 + INST_ADD, // X86/X64 + INST_ADDPD, + INST_ADDPS, + INST_ADDSD, + INST_ADDSS, + INST_ADDSUBPD, + INST_ADDSUBPS, + INST_AMD_PREFETCH, + INST_AMD_PREFETCHW, + INST_AND, // X86/X64 + INST_ANDNPD, + INST_ANDNPS, + INST_ANDPD, + INST_ANDPS, + INST_BLENDPD, + INST_BLENDPS, + INST_BLENDVPD, + INST_BLENDVPS, + INST_BSF, // X86/X64 + INST_BSR, // X86/X64 + INST_BSWAP, // X86/X64 (i486) + INST_BT, // X86/X64 + INST_BTC, // X86/X64 + INST_BTR, // X86/X64 + INST_BTS, // X86/X64 + INST_CALL, // X86/X64 + INST_CBW, // X86/X64 + INST_CDQE, // X64 only + INST_CLC, // X86/X64 + INST_CLD, // X86/X64 + INST_CLFLUSH, + INST_CMC, // X86/X64 + + INST_CMOV, // Begin (cmovcc) (i586) + INST_CMOVA = INST_CMOV, //X86/X64 (cmovcc) (i586) + INST_CMOVAE, // X86/X64 (cmovcc) (i586) + INST_CMOVB, // X86/X64 (cmovcc) (i586) + INST_CMOVBE, // X86/X64 (cmovcc) (i586) + INST_CMOVC, // X86/X64 (cmovcc) (i586) + INST_CMOVE, // X86/X64 (cmovcc) (i586) + INST_CMOVG, // X86/X64 (cmovcc) (i586) + INST_CMOVGE, // X86/X64 (cmovcc) (i586) + INST_CMOVL, // X86/X64 (cmovcc) (i586) + INST_CMOVLE, // X86/X64 (cmovcc) (i586) + INST_CMOVNA, // X86/X64 (cmovcc) (i586) + INST_CMOVNAE, // X86/X64 (cmovcc) (i586) + INST_CMOVNB, // X86/X64 (cmovcc) (i586) + INST_CMOVNBE, // X86/X64 (cmovcc) (i586) + INST_CMOVNC, // X86/X64 (cmovcc) (i586) + INST_CMOVNE, // X86/X64 (cmovcc) (i586) + INST_CMOVNG, // X86/X64 (cmovcc) (i586) + INST_CMOVNGE, // X86/X64 (cmovcc) (i586) + INST_CMOVNL, // X86/X64 (cmovcc) (i586) + INST_CMOVNLE, // X86/X64 (cmovcc) (i586) + INST_CMOVNO, // X86/X64 (cmovcc) (i586) + INST_CMOVNP, // X86/X64 (cmovcc) (i586) + INST_CMOVNS, // X86/X64 (cmovcc) (i586) + INST_CMOVNZ, // X86/X64 (cmovcc) (i586) + INST_CMOVO, // X86/X64 (cmovcc) (i586) + INST_CMOVP, // X86/X64 (cmovcc) (i586) + INST_CMOVPE, // X86/X64 (cmovcc) (i586) + INST_CMOVPO, // X86/X64 (cmovcc) (i586) + INST_CMOVS, // X86/X64 (cmovcc) (i586) + INST_CMOVZ, // X86/X64 (cmovcc) (i586) + + INST_CMP, // X86/X64 + INST_CMPPD, + INST_CMPPS, + INST_CMPSD, + INST_CMPSS, + INST_CMPXCHG, // X86/X64 (i486) + INST_CMPXCHG16B, // X64 only + INST_CMPXCHG8B, // X86/X64 (i586) + INST_COMISD, + INST_COMISS, + INST_CPUID, // X86/X64 (i486) + INST_CRC32, + INST_CVTDQ2PD, + INST_CVTDQ2PS, + INST_CVTPD2DQ, + INST_CVTPD2PI, + INST_CVTPD2PS, + INST_CVTPI2PD, + INST_CVTPI2PS, + INST_CVTPS2DQ, + INST_CVTPS2PD, + INST_CVTPS2PI, + INST_CVTSD2SI, + INST_CVTSD2SS, + INST_CVTSI2SD, + INST_CVTSI2SS, + INST_CVTSS2SD, + INST_CVTSS2SI, + INST_CVTTPD2DQ, + INST_CVTTPD2PI, + INST_CVTTPS2DQ, + INST_CVTTPS2PI, + INST_CVTTSD2SI, + INST_CVTTSS2SI, + INST_CWDE, // X86/X64 + INST_DAA, // X86 only + INST_DAS, // X86 only + INST_DEC, // X86/X64 + INST_DIV, // X86/X64 + INST_DIVPD, + INST_DIVPS, + INST_DIVSD, + INST_DIVSS, + INST_DPPD, + INST_DPPS, + INST_EMMS, // MMX + INST_ENTER, // X86/X64 + INST_EXTRACTPS, + INST_F2XM1, // X87 + INST_FABS, // X87 + INST_FADD, // X87 + INST_FADDP, // X87 + INST_FBLD, // X87 + INST_FBSTP, // X87 + INST_FCHS, // X87 + INST_FCLEX, // X87 + INST_FCMOVB, // X87 + INST_FCMOVBE, // X87 + INST_FCMOVE, // X87 + INST_FCMOVNB, // X87 + INST_FCMOVNBE, // X87 + INST_FCMOVNE, // X87 + INST_FCMOVNU, // X87 + INST_FCMOVU, // X87 + INST_FCOM, // X87 + INST_FCOMI, // X87 + INST_FCOMIP, // X87 + INST_FCOMP, // X87 + INST_FCOMPP, // X87 + INST_FCOS, // X87 + INST_FDECSTP, // X87 + INST_FDIV, // X87 + INST_FDIVP, // X87 + INST_FDIVR, // X87 + INST_FDIVRP, // X87 + INST_FEMMS, // 3dNow! + INST_FFREE, // X87 + INST_FIADD, // X87 + INST_FICOM, // X87 + INST_FICOMP, // X87 + INST_FIDIV, // X87 + INST_FIDIVR, // X87 + INST_FILD, // X87 + INST_FIMUL, // X87 + INST_FINCSTP, // X87 + INST_FINIT, // X87 + INST_FIST, // X87 + INST_FISTP, // X87 + INST_FISTTP, + INST_FISUB, // X87 + INST_FISUBR, // X87 + INST_FLD, // X87 + INST_FLD1, // X87 + INST_FLDCW, // X87 + INST_FLDENV, // X87 + INST_FLDL2E, // X87 + INST_FLDL2T, // X87 + INST_FLDLG2, // X87 + INST_FLDLN2, // X87 + INST_FLDPI, // X87 + INST_FLDZ, // X87 + INST_FMUL, // X87 + INST_FMULP, // X87 + INST_FNCLEX, // X87 + INST_FNINIT, // X87 + INST_FNOP, // X87 + INST_FNSAVE, // X87 + INST_FNSTCW, // X87 + INST_FNSTENV, // X87 + INST_FNSTSW, // X87 + INST_FPATAN, // X87 + INST_FPREM, // X87 + INST_FPREM1, // X87 + INST_FPTAN, // X87 + INST_FRNDINT, // X87 + INST_FRSTOR, // X87 + INST_FSAVE, // X87 + INST_FSCALE, // X87 + INST_FSIN, // X87 + INST_FSINCOS, // X87 + INST_FSQRT, // X87 + INST_FST, // X87 + INST_FSTCW, // X87 + INST_FSTENV, // X87 + INST_FSTP, // X87 + INST_FSTSW, // X87 + INST_FSUB, // X87 + INST_FSUBP, // X87 + INST_FSUBR, // X87 + INST_FSUBRP, // X87 + INST_FTST, // X87 + INST_FUCOM, // X87 + INST_FUCOMI, // X87 + INST_FUCOMIP, // X87 + INST_FUCOMP, // X87 + INST_FUCOMPP, // X87 + INST_FWAIT, // X87 + INST_FXAM, // X87 + INST_FXCH, // X87 + INST_FXRSTOR, // X87 + INST_FXSAVE, // X87 + INST_FXTRACT, // X87 + INST_FYL2X, // X87 + INST_FYL2XP1, // X87 + INST_HADDPD, + INST_HADDPS, + INST_HSUBPD, + INST_HSUBPS, + INST_IDIV, // X86/X64 + INST_IMUL, // X86/X64 + INST_INC, // X86/X64 + INST_INT3, // X86/X64 + INST_J, // Begin (jcc) + INST_JA = + INST_J, // X86/X64 (jcc) + INST_JAE, // X86/X64 (jcc) + INST_JB, // X86/X64 (jcc) + INST_JBE, // X86/X64 (jcc) + INST_JC, // X86/X64 (jcc) + INST_JE, // X86/X64 (jcc) + INST_JG, // X86/X64 (jcc) + INST_JGE, // X86/X64 (jcc) + INST_JL, // X86/X64 (jcc) + INST_JLE, // X86/X64 (jcc) + INST_JNA, // X86/X64 (jcc) + INST_JNAE, // X86/X64 (jcc) + INST_JNB, // X86/X64 (jcc) + INST_JNBE, // X86/X64 (jcc) + INST_JNC, // X86/X64 (jcc) + INST_JNE, // X86/X64 (jcc) + INST_JNG, // X86/X64 (jcc) + INST_JNGE, // X86/X64 (jcc) + INST_JNL, // X86/X64 (jcc) + INST_JNLE, // X86/X64 (jcc) + INST_JNO, // X86/X64 (jcc) + INST_JNP, // X86/X64 (jcc) + INST_JNS, // X86/X64 (jcc) + INST_JNZ, // X86/X64 (jcc) + INST_JO, // X86/X64 (jcc) + INST_JP, // X86/X64 (jcc) + INST_JPE, // X86/X64 (jcc) + INST_JPO, // X86/X64 (jcc) + INST_JS, // X86/X64 (jcc) + INST_JZ, // X86/X64 (jcc) + INST_JMP, // X86/X64 (jmp) + INST_LDDQU, + INST_LDMXCSR, + INST_LAHF, // X86/X64 (CPUID NEEDED) + INST_LEA, // X86/X64 + INST_LEAVE, // X86/X64 + INST_LFENCE, + INST_MASKMOVDQU, + INST_MASKMOVQ, // MMX Extensions + INST_MAXPD, + INST_MAXPS, + INST_MAXSD, + INST_MAXSS, + INST_MFENCE, + INST_MINPD, + INST_MINPS, + INST_MINSD, + INST_MINSS, + INST_MONITOR, + INST_MOV, // X86/X64 + INST_MOVAPD, + INST_MOVAPS, + INST_MOVBE, + INST_MOVD, + INST_MOVDDUP, + INST_MOVDQ2Q, + INST_MOVDQA, + INST_MOVDQU, + INST_MOVHLPS, + INST_MOVHPD, + INST_MOVHPS, + INST_MOVLHPS, + INST_MOVLPD, + INST_MOVLPS, + INST_MOVMSKPD, + INST_MOVMSKPS, + INST_MOVNTDQ, + INST_MOVNTDQA, + INST_MOVNTI, + INST_MOVNTPD, + INST_MOVNTPS, + INST_MOVNTQ, // MMX Extensions + INST_MOVQ, + INST_MOVQ2DQ, + INST_MOVSD, + INST_MOVSHDUP, + INST_MOVSLDUP, + INST_MOVSS, + INST_MOVSX, // X86/X64 + INST_MOVSXD, // X86/X64 + INST_MOVUPD, + INST_MOVUPS, + INST_MOVZX, // X86/X64 + INST_MOV_PTR, // X86/X64 + INST_MPSADBW, + INST_MUL, // X86/X64 + INST_MULPD, + INST_MULPS, + INST_MULSD, + INST_MULSS, + INST_MWAIT, + INST_NEG, // X86/X64 + INST_NOP, // X86/X64 + INST_NOT, // X86/X64 + INST_OR, // X86/X64 + INST_ORPD, + INST_ORPS, + INST_PABSB, + INST_PABSD, + INST_PABSW, + INST_PACKSSDW, + INST_PACKSSWB, + INST_PACKUSDW, + INST_PACKUSWB, + INST_PADDB, + INST_PADDD, + INST_PADDQ, + INST_PADDSB, + INST_PADDSW, + INST_PADDUSB, + INST_PADDUSW, + INST_PADDW, + INST_PALIGNR, + INST_PAND, + INST_PANDN, + INST_PAUSE, + INST_PAVGB, // MMX Extensions + INST_PAVGW, // MMX Extensions + INST_PBLENDVB, + INST_PBLENDW, + INST_PCMPEQB, + INST_PCMPEQD, + INST_PCMPEQQ, + INST_PCMPEQW, + INST_PCMPESTRI, + INST_PCMPESTRM, + INST_PCMPGTB, + INST_PCMPGTD, + INST_PCMPGTQ, + INST_PCMPGTW, + INST_PCMPISTRI, + INST_PCMPISTRM, + INST_PEXTRB, + INST_PEXTRD, + INST_PEXTRQ, + INST_PEXTRW, // MMX Extensions + INST_PF2ID, // 3dNow! + INST_PF2IW, // 3dNow! Extensions + INST_PFACC, // 3dNow! + INST_PFADD, // 3dNow! + INST_PFCMPEQ, // 3dNow! + INST_PFCMPGE, // 3dNow! + INST_PFCMPGT, // 3dNow! + INST_PFMAX, // 3dNow! + INST_PFMIN, // 3dNow! + INST_PFMUL, // 3dNow! + INST_PFNACC, // 3dNow! Extensions + INST_PFPNACC, // 3dNow! Extensions + INST_PFRCP, // 3dNow! + INST_PFRCPIT1, // 3dNow! + INST_PFRCPIT2, // 3dNow! + INST_PFRSQIT1, // 3dNow! + INST_PFRSQRT, // 3dNow! + INST_PFSUB, // 3dNow! + INST_PFSUBR, // 3dNow! + INST_PHADDD, + INST_PHADDSW, + INST_PHADDW, + INST_PHMINPOSUW, + INST_PHSUBD, + INST_PHSUBSW, + INST_PHSUBW, + INST_PI2FD, // 3dNow! + INST_PI2FW, // 3dNow! Extensions + INST_PINSRB, + INST_PINSRD, + INST_PINSRQ, + INST_PINSRW, // MMX Extensions + INST_PMADDUBSW, + INST_PMADDWD, + INST_PMAXSB, + INST_PMAXSD, + INST_PMAXSW, // MMX Extensions + INST_PMAXUB, // MMX Extensions + INST_PMAXUD, + INST_PMAXUW, + INST_PMINSB, + INST_PMINSD, + INST_PMINSW, // MMX Extensions + INST_PMINUB, // MMX Extensions + INST_PMINUD, + INST_PMINUW, + INST_PMOVMSKB, // MMX Extensions + INST_PMOVSXBD, + INST_PMOVSXBQ, + INST_PMOVSXBW, + INST_PMOVSXDQ, + INST_PMOVSXWD, + INST_PMOVSXWQ, + INST_PMOVZXBD, + INST_PMOVZXBQ, + INST_PMOVZXBW, + INST_PMOVZXDQ, + INST_PMOVZXWD, + INST_PMOVZXWQ, + INST_PMULDQ, + INST_PMULHRSW, + INST_PMULHUW, // MMX Extensions + INST_PMULHW, + INST_PMULLD, + INST_PMULLW, + INST_PMULUDQ, + INST_POP, // X86/X64 + INST_POPAD, // X86 only + INST_POPCNT, + INST_POPFD, // X86 only + INST_POPFQ, // X64 only + INST_POR, + INST_PREFETCH, // MMX Extensions + INST_PSADBW, // MMX Extensions + INST_PSHUFB, + INST_PSHUFD, + INST_PSHUFW, // MMX Extensions + INST_PSHUFHW, + INST_PSHUFLW, + INST_PSIGNB, + INST_PSIGND, + INST_PSIGNW, + INST_PSLLD, + INST_PSLLDQ, + INST_PSLLQ, + INST_PSLLW, + INST_PSRAD, + INST_PSRAW, + INST_PSRLD, + INST_PSRLDQ, + INST_PSRLQ, + INST_PSRLW, + INST_PSUBB, + INST_PSUBD, + INST_PSUBQ, + INST_PSUBSB, + INST_PSUBSW, + INST_PSUBUSB, + INST_PSUBUSW, + INST_PSUBW, + INST_PSWAPD, // 3dNow! Extensions + INST_PTEST, + INST_PUNPCKHBW, + INST_PUNPCKHDQ, + INST_PUNPCKHQDQ, + INST_PUNPCKHWD, + INST_PUNPCKLBW, + INST_PUNPCKLDQ, + INST_PUNPCKLQDQ, + INST_PUNPCKLWD, + INST_PUSH, // X86/X64 + INST_PUSHAD, // X86 only + INST_PUSHFD, // X86 only + INST_PUSHFQ, // X64 only + INST_PXOR, + INST_RCL, // X86/X64 + INST_RCPPS, + INST_RCPSS, + INST_RCR, // X86/X64 + INST_RDTSC, // X86/X64 + INST_RDTSCP, // X86/X64 + INST_REP_LODSB, // X86/X64 (REP) + INST_REP_LODSD, // X86/X64 (REP) + INST_REP_LODSQ, // X64 only (REP) + INST_REP_LODSW, // X86/X64 (REP) + INST_REP_MOVSB, // X86/X64 (REP) + INST_REP_MOVSD, // X86/X64 (REP) + INST_REP_MOVSQ, // X64 only (REP) + INST_REP_MOVSW, // X86/X64 (REP) + INST_REP_STOSB, // X86/X64 (REP) + INST_REP_STOSD, // X86/X64 (REP) + INST_REP_STOSQ, // X64 only (REP) + INST_REP_STOSW, // X86/X64 (REP) + INST_REPE_CMPSB, // X86/X64 (REP) + INST_REPE_CMPSD, // X86/X64 (REP) + INST_REPE_CMPSQ, // X64 only (REP) + INST_REPE_CMPSW, // X86/X64 (REP) + INST_REPE_SCASB, // X86/X64 (REP) + INST_REPE_SCASD, // X86/X64 (REP) + INST_REPE_SCASQ, // X64 only (REP) + INST_REPE_SCASW, // X86/X64 (REP) + INST_REPNE_CMPSB, // X86/X64 (REP) + INST_REPNE_CMPSD, // X86/X64 (REP) + INST_REPNE_CMPSQ, // X64 only (REP) + INST_REPNE_CMPSW, // X86/X64 (REP) + INST_REPNE_SCASB, // X86/X64 (REP) + INST_REPNE_SCASD, // X86/X64 (REP) + INST_REPNE_SCASQ, // X64 only (REP) + INST_REPNE_SCASW, // X86/X64 (REP) + INST_RET, // X86/X64 + INST_ROL, // X86/X64 + INST_ROR, // X86/X64 + INST_ROUNDPD, + INST_ROUNDPS, + INST_ROUNDSD, + INST_ROUNDSS, + INST_RSQRTPS, + INST_RSQRTSS, + INST_SAHF, // X86/X64 (CPUID NEEDED) + INST_SAL, // X86/X64 + INST_SAR, // X86/X64 + INST_SBB, // X86/X64 + INST_SET, // Begin (setcc) + INST_SETA=INST_SET, // X86/X64 (setcc) + INST_SETAE, // X86/X64 (setcc) + INST_SETB, // X86/X64 (setcc) + INST_SETBE, // X86/X64 (setcc) + INST_SETC, // X86/X64 (setcc) + INST_SETE, // X86/X64 (setcc) + INST_SETG, // X86/X64 (setcc) + INST_SETGE, // X86/X64 (setcc) + INST_SETL, // X86/X64 (setcc) + INST_SETLE, // X86/X64 (setcc) + INST_SETNA, // X86/X64 (setcc) + INST_SETNAE, // X86/X64 (setcc) + INST_SETNB, // X86/X64 (setcc) + INST_SETNBE, // X86/X64 (setcc) + INST_SETNC, // X86/X64 (setcc) + INST_SETNE, // X86/X64 (setcc) + INST_SETNG, // X86/X64 (setcc) + INST_SETNGE, // X86/X64 (setcc) + INST_SETNL, // X86/X64 (setcc) + INST_SETNLE, // X86/X64 (setcc) + INST_SETNO, // X86/X64 (setcc) + INST_SETNP, // X86/X64 (setcc) + INST_SETNS, // X86/X64 (setcc) + INST_SETNZ, // X86/X64 (setcc) + INST_SETO, // X86/X64 (setcc) + INST_SETP, // X86/X64 (setcc) + INST_SETPE, // X86/X64 (setcc) + INST_SETPO, // X86/X64 (setcc) + INST_SETS, // X86/X64 (setcc) + INST_SETZ, // X86/X64 (setcc) + INST_SFENCE, // MMX Extensions + INST_SHL, // X86/X64 + INST_SHLD, // X86/X64 + INST_SHR, // X86/X64 + INST_SHRD, // X86/X64 + INST_SHUFPD, + INST_SHUFPS, + INST_SQRTPD, + INST_SQRTPS, + INST_SQRTSD, + INST_SQRTSS, + INST_STC, // X86/X64 + INST_STD, // X86/X64 + INST_STMXCSR, + INST_SUB, // X86/X64 + INST_SUBPD, + INST_SUBPS, + INST_SUBSD, + INST_SUBSS, + INST_TEST, // X86/X64 + INST_UCOMISD, + INST_UCOMISS, + INST_UD2, // X86/X64 + INST_UNPCKHPD, + INST_UNPCKHPS, + INST_UNPCKLPD, + INST_UNPCKLPS, + INST_XADD, // X86/X64 (i486) + INST_XCHG, // X86/X64 (i386) + INST_XOR, // X86/X64 + INST_XORPD, + INST_XORPS, + + _INST_COUNT, + + _INST_J_BEGIN = INST_J, + _INST_J_END = INST_JMP +}; + +// ============================================================================ +// [AsmJit::Instruction Name] +// ============================================================================ + +//! @internal +//! +//! @brief Instruction names. +ASMJIT_API extern const char instructionName[]; + +// ============================================================================ +// [AsmJit::Instruction Description] +// ============================================================================ + +struct InstructionDescription +{ + // -------------------------------------------------------------------------- + // [Instruction Groups] + // -------------------------------------------------------------------------- + + //! @brief Instruction groups. + //! + //! This should be only used by assembler, because it's @c AsmJit::Assembler + //! specific grouping. Each group represents one 'case' in the Assembler's + //! main emit method. + enum G + { + // Gloup categories. + G_EMIT, + + G_ALU, + G_BSWAP, + G_BT, + G_CALL, + G_CRC32, + G_ENTER, + G_IMUL, + G_INC_DEC, + G_J, + G_JMP, + G_LEA, + G_M, + G_MOV, + G_MOV_PTR, + G_MOVSX_MOVZX, + G_MOVSXD, + G_PUSH, // I_PUSH is implemented before I_POP + G_POP, + G_R_RM, + G_RM_B, + G_RM, + G_RM_R, + G_REP, + G_RET, + G_ROT, + G_SHLD_SHRD, + G_TEST, + G_XCHG, + + // Group for x87 FP instructions in format mem or st(i), st(i) (fadd, fsub, fdiv, ...) + G_X87_FPU, + // Group for x87 FP instructions in format st(i), st(i) + G_X87_STI, + // Group for fld/fst/fstp instruction, internally uses I_X87_MEM group. + G_X87_MEM_STI, + // Group for x87 FP instructions that uses Word, DWord, QWord or TWord memory pointer. + G_X87_MEM, + // Group for x87 FSTSW/FNSTSW instructions + G_X87_FSTSW, + + // Group for movbe instruction + G_MOVBE, + + // Group for MMX/SSE instructions in format (X)MM|Reg|Mem <- (X)MM|Reg|Mem, + // 0x66 prefix must be set manually in opcodes. + // - Primary opcode is used for instructions in (X)MM <- (X)MM/Mem format, + // - Secondary opcode is used for instructions in (X)MM/Mem <- (X)MM format. + G_MMU_MOV, + + // Group for movd and movq instructions. + G_MMU_MOVD, + G_MMU_MOVQ, + + // Group for pextrd, pextrq and pextrw instructions (it's special instruction + // not similar to others) + G_MMU_PEXTR, + + // Group for prefetch instruction + G_MMU_PREFETCH, + + // Group for MMX/SSE instructions in format (X)MM|Reg <- (X)MM|Reg|Mem|Imm, + // 0x66 prefix is added for MMX instructions that used by SSE2 registers. + // - Primary opcode is used for instructions in (X)MM|Reg <- (X)MM|Reg|Mem format, + // - Secondary opcode is iused for instructions in (X)MM|Reg <- Imm format. + G_MMU_RMI, + G_MMU_RM_IMM8, + + // Group for 3dNow instructions + G_MMU_RM_3DNOW + }; + + // -------------------------------------------------------------------------- + // [Instruction Core Flags] + // -------------------------------------------------------------------------- + + //! @brief Instruction core flags. + enum F + { + //! @brief No flags. + F_NONE = 0x00, + //! @brief Instruction is jump, conditional jump, call or ret. + F_JUMP = 0x01, + //! @brief Instruction will overwrite first operand - o[0]. + F_MOV = 0x02, + //! @brief Instruction is X87 FPU. + F_FPU = 0x04, + //! @brief Instruction can be prepended using LOCK prefix + //! (usable for multithreaded applications). + F_LOCKABLE = 0x08, + + //! @brief Instruction is special, this is for @c Compiler. + F_SPECIAL = 0x10, + //! @brief Instruction always performs memory access. + //! + //! This flag is always combined with @c F_SPECIAL and signalizes that + //! there is implicit address which is accessed (usually EDI/RDI or ESI/EDI). + F_SPECIAL_MEM = 0x20 + }; + + // -------------------------------------------------------------------------- + // [Instruction Operand Flags] + // -------------------------------------------------------------------------- + + //! @brief Instruction operand flags. + enum O + { + // X86, MM, XMM + O_GB = 0x0001, + O_GW = 0x0002, + O_GD = 0x0004, + O_GQ = 0x0008, + O_MM = 0x0010, + O_XMM = 0x0020, + O_MEM = 0x0040, + O_IMM = 0x0080, + + + O_GB_MEM = O_GB | O_MEM, + O_GW_MEM = O_GW | O_MEM, + O_GD_MEM = O_GD | O_MEM, + O_GQ_MEM = O_GQ | O_MEM, + + O_GQDWB = O_GQ | O_GD | O_GW | O_GB, + O_GQDW = O_GQ | O_GD | O_GW, + O_GQD = O_GQ | O_GD, + O_GWB = O_GW | O_GB, + + O_GQDWB_MEM = O_GQDWB | O_MEM, + O_GQDW_MEM = O_GQDW | O_MEM, + O_GQD_MEM = O_GQD | O_MEM, + O_GWB_MEM = O_GWB | O_MEM, + + O_MM_MEM = O_MM | O_MEM, + O_XMM_MEM = O_XMM | O_MEM, + O_MM_XMM = O_MM | O_XMM, + O_MM_XMM_MEM = O_MM | O_XMM | O_MEM, + + // X87 + O_FM_2 = O_MEM | 0x0100, + O_FM_4 = O_MEM | 0x0200, + O_FM_8 = O_MEM | 0x0400, + O_FM_10 = O_MEM | 0x0800, + + O_FM_2_4 = O_FM_2 | O_FM_4, + O_FM_2_4_8 = O_FM_2 | O_FM_4 | O_FM_8, + O_FM_4_8 = O_FM_4 | O_FM_8, + O_FM_4_8_10 = O_FM_4 | O_FM_8 | O_FM_10, + + // Don't emit REX prefix. + O_NOREX = 0x2000 + }; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Instruction code. + uint16_t code; + //! @brief Instruction name index in instructionName[] array. + uint16_t nameIndex; + //! @brief Instruction group, used also by @c Compiler. + uint8_t group; + //! @brief Instruction type flags. + uint8_t flags; + //! @brief First and second operand flags (some groups depends to these settings, used also by @c Compiler). + uint16_t oflags[2]; + //! @brief If instruction has only memory operand, this is register opcode. + uint16_t opCodeR; + //! @brief Primary and secondary opcodes. + uint32_t opCode[2]; + + //! @brief Get the instruction name (null terminated string). + inline const char* getName() const { return instructionName + nameIndex; } + + //! @brief Get whether the instruction is conditional or standard jump. + inline bool isJump() const { return (flags & F_JUMP) != 0; } + //! @brief Get whether the instruction is MOV type. + inline bool isMov() const { return (flags & F_MOV) != 0; } + //! @brief Get whether the instruction is X87 FPU type. + inline bool isFPU() const { return (flags & F_FPU) != 0; } + //! @brief Get whether the instruction can be prefixed by LOCK prefix. + inline bool isLockable() const { return (flags & F_LOCKABLE) != 0; } + + //! @brief Get whether the instruction is special type (this is used by + //! @c Compiler to manage additional variables or functionality). + inline bool isSpecial() const { return (flags & F_SPECIAL) != 0; } + //! @brief Get whether the instruction is special type and it performs + //! memory access. + inline bool isSpecialMem() const { return (flags & F_SPECIAL_MEM) != 0; } +}; + +ASMJIT_API extern const InstructionDescription instructionDescription[]; + +// ============================================================================ +// [AsmJit::EMIT_OPTIONS] +// ============================================================================ + +//! @brief Emit options, mainly for internal purposes. +enum EMIT_OPTIONS +{ + //! @brief Force REX prefix to be emitted. + //! + //! This option should be used carefully, because there are unencodable + //! combinations. If you want to access ah, bh, ch or dh registers then you + //! can't emit REX prefix and it will cause an illegal instruction error. + EMIT_OPTION_REX_PREFIX = (1 << 0), + + //! @brief Tell @c Assembler or @c Compiler to emit and validate lock prefix. + //! + //! If this option is used and instruction doesn't support LOCK prefix then + //! invalid instruction error is generated. + EMIT_OPTION_LOCK_PREFIX = (1 << 1), + + //! @brief Emit short/near jump or conditional jump instead of far one, + //! saving some bytes. + EMIT_OPTION_SHORT_JUMP = (1 << 2) +}; + +// ============================================================================ +// [AsmJit::CALL_CONV] +// ============================================================================ + +//! @brief Calling convention type. +//! +//! Calling convention is scheme how function arguments are passed into +//! function and how functions returns values. In assembler programming +//! it's needed to always comply with function calling conventions, because +//! even small inconsistency can cause undefined behavior or crash. +//! +//! List of calling conventions for 32-bit x86 mode: +//! - @c CALL_CONV_CDECL - Calling convention for C runtime. +//! - @c CALL_CONV_STDCALL - Calling convention for WinAPI functions. +//! - @c CALL_CONV_MSTHISCALL - Calling convention for C++ members under +//! Windows (produced by MSVC and all MSVC compatible compilers). +//! - @c CALL_CONV_MSFASTCALL - Fastest calling convention that can be used +//! by MSVC compiler. +//! - @c CALL_CONV_BORNANDFASTCALL - Borland fastcall convention. +//! - @c CALL_CONV_GCCFASTCALL - GCC fastcall convention (2 register arguments). +//! - @c CALL_CONV_GCCREGPARM_1 - GCC regparm(1) convention. +//! - @c CALL_CONV_GCCREGPARM_2 - GCC regparm(2) convention. +//! - @c CALL_CONV_GCCREGPARM_3 - GCC regparm(3) convention. +//! +//! List of calling conventions for 64-bit x86 mode (x64): +//! - @c CALL_CONV_X64W - Windows 64-bit calling convention (WIN64 ABI). +//! - @c CALL_CONV_X64U - Unix 64-bit calling convention (AMD64 ABI). +//! +//! There is also @c CALL_CONV_DEFAULT that is defined to fit best to your +//! compiler. +//! +//! These types are used together with @c AsmJit::Compiler::newFunction() +//! method. +enum CALL_CONV +{ + //! @brief Calling convention is invalid (can't be used). + CALL_CONV_NONE = 0, + + // [X64 Calling Conventions] + + //! @brief X64 calling convention for Windows platform (WIN64 ABI). + //! + //! For first four arguments are used these registers: + //! - 1. 32/64-bit integer or floating point argument - rcx/xmm0 + //! - 2. 32/64-bit integer or floating point argument - rdx/xmm1 + //! - 3. 32/64-bit integer or floating point argument - r8/xmm2 + //! - 4. 32/64-bit integer or floating point argument - r9/xmm3 + //! + //! Note first four arguments here means arguments at positions from 1 to 4 + //! (included). For example if second argument is not passed by register then + //! rdx/xmm1 register is unused. + //! + //! All other arguments are pushed on the stack in right-to-left direction. + //! Stack is aligned by 16 bytes. There is 32-byte shadow space on the stack + //! that can be used to save up to four 64-bit registers (probably designed to + //! be used to save first four arguments passed in registers). + //! + //! Arguments direction: + //! - Right to Left (except for first 4 parameters that's in registers) + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - RAX register. + //! - Floating points - XMM0 register. + //! + //! Stack is always aligned by 16 bytes. + //! + //! More information about this calling convention can be found on MSDN: + //! http://msdn.microsoft.com/en-us/library/9b372w95.aspx . + CALL_CONV_X64W = 1, + + //! @brief X64 calling convention for Unix platforms (AMD64 ABI). + //! + //! First six 32 or 64-bit integer arguments are passed in rdi, rsi, rdx, + //! rcx, r8, r9 registers. First eight floating point or XMM arguments + //! are passed in xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 registers. + //! This means that in registers can be transferred up to 14 arguments total. + //! + //! There is also RED ZONE below the stack pointer that can be used for + //! temporary storage. The red zone is the space from [rsp-128] to [rsp-8]. + //! + //! Arguments direction: + //! - Right to Left (Except for arguments passed in registers). + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - RAX register. + //! - Floating points - XMM0 register. + //! + //! Stack is always aligned by 16 bytes. + CALL_CONV_X64U = 2, + + // [X86 Calling Conventions] + + //! @brief Cdecl calling convention (used by C runtime). + //! + //! Compatible across MSVC and GCC. + //! + //! Arguments direction: + //! - Right to Left + //! + //! Stack is cleaned by: + //! - Caller. + CALL_CONV_CDECL = 3, + + //! @brief Stdcall calling convention (used by WinAPI). + //! + //! Compatible across MSVC and GCC. + //! + //! Arguments direction: + //! - Right to Left + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - st(0) register. + CALL_CONV_STDCALL = 4, + + //! @brief MSVC specific calling convention used by MSVC/Intel compilers + //! for struct/class methods. + //! + //! This is MSVC (and Intel) only calling convention used in Windows + //! world for C++ class methods. Implicit 'this' pointer is stored in + //! ECX register instead of storing it on the stack. + //! + //! Arguments direction: + //! - Right to Left (except this pointer in ECX) + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - st(0) register. + //! + //! C++ class methods that have variable count of arguments uses different + //! calling convention called cdecl. + //! + //! @note This calling convention is always used by MSVC for class methods, + //! it's implicit and there is no way how to override it. + CALL_CONV_MSTHISCALL = 5, + + //! @brief MSVC specific fastcall. + //! + //! Two first parameters (evaluated from left-to-right) are in ECX:EDX + //! registers, all others on the stack in right-to-left order. + //! + //! Arguments direction: + //! - Right to Left (except to first two integer arguments in ECX:EDX) + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - st(0) register. + //! + //! @note This calling convention differs to GCC one in stack cleaning + //! mechanism. + CALL_CONV_MSFASTCALL = 6, + + //! @brief Borland specific fastcall with 2 parameters in registers. + //! + //! Two first parameters (evaluated from left-to-right) are in ECX:EDX + //! registers, all others on the stack in left-to-right order. + //! + //! Arguments direction: + //! - Left to Right (except to first two integer arguments in ECX:EDX) + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - st(0) register. + //! + //! @note Arguments on the stack are in left-to-right order that differs + //! to other fastcall conventions used in different compilers. + CALL_CONV_BORLANDFASTCALL = 7, + + //! @brief GCC specific fastcall convention. + //! + //! Two first parameters (evaluated from left-to-right) are in ECX:EDX + //! registers, all others on the stack in right-to-left order. + //! + //! Arguments direction: + //! - Right to Left (except to first two integer arguments in ECX:EDX) + //! + //! Stack is cleaned by: + //! - Callee. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - st(0) register. + //! + //! @note This calling convention should be compatible to + //! @c CALL_CONV_MSFASTCALL. + CALL_CONV_GCCFASTCALL = 8, + + //! @brief GCC specific regparm(1) convention. + //! + //! The first parameter (evaluated from left-to-right) is in EAX register, + //! all others on the stack in right-to-left order. + //! + //! Arguments direction: + //! - Right to Left (except to first one integer argument in EAX) + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - st(0) register. + CALL_CONV_GCCREGPARM_1 = 9, + + //! @brief GCC specific regparm(2) convention. + //! + //! Two first parameters (evaluated from left-to-right) are in EAX:EDX + //! registers, all others on the stack in right-to-left order. + //! + //! Arguments direction: + //! - Right to Left (except to first two integer arguments in EAX:EDX) + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - st(0) register. + CALL_CONV_GCCREGPARM_2 = 10, + + //! @brief GCC specific fastcall with 3 parameters in registers. + //! + //! Three first parameters (evaluated from left-to-right) are in + //! EAX:EDX:ECX registers, all others on the stack in right-to-left order. + //! + //! Arguments direction: + //! - Right to Left (except to first three integer arguments in EAX:EDX:ECX) + //! + //! Stack is cleaned by: + //! - Caller. + //! + //! Return value: + //! - Integer types - EAX:EDX registers. + //! - Floating points - st(0) register. + CALL_CONV_GCCREGPARM_3 = 11, + + // [Preferred Calling Convention] + + //! @def CALL_CONV_DEFAULT + //! @brief Default calling convention for current platform / operating system. + +#if defined(ASMJIT_X86) + + CALL_CONV_DEFAULT = CALL_CONV_CDECL, + +# if defined(_MSC_VER) + CALL_CONV_COMPAT_FASTCALL = CALL_CONV_MSFASTCALL, +# elif defined(__GNUC__) + CALL_CONV_COMPAT_FASTCALL = CALL_CONV_GCCFASTCALL, +# elif defined(__BORLANDC__) + CALL_CONV_COMPAT_FASTCALL = CALL_CONV_BORLANDFASTCALL, +# else +# error "AsmJit::CALL_CONV_COMPATIBLE_FASTCALL_2 - Unsupported." +# endif + + CALL_CONV_COMPAT_STDCALL = CALL_CONV_STDCALL, + CALL_CONV_COMPAT_CDECL = CALL_CONV_CDECL + +#else + +# if defined(ASMJIT_WINDOWS) + CALL_CONV_DEFAULT = CALL_CONV_X64W, +# else + CALL_CONV_DEFAULT = CALL_CONV_X64U, +# endif + + CALL_CONV_COMPAT_FASTCALL = CALL_CONV_DEFAULT, + CALL_CONV_COMPAT_STDCALL = CALL_CONV_DEFAULT, + CALL_CONV_COMPAT_CDECL = CALL_CONV_DEFAULT + +#endif // ASMJIT_X86 +}; + +// ============================================================================ +// [AsmJit::VARIABLE_TYPE] +// ============================================================================ + +//! @brief Variable type. +enum VARIABLE_TYPE +{ + // -------------------------------------------------------------------------- + // [Platform Dependent] + // -------------------------------------------------------------------------- + + //! @brief Variable is 32-bit general purpose register. + VARIABLE_TYPE_GPD = 0, + //! @brief Variable is 64-bit general purpose register. + VARIABLE_TYPE_GPQ = 1, + + //! @var VARIABLE_TYPE_GPN + //! @brief Variable is system wide general purpose register (32-bit or 64-bit). +#if defined(ASMJIT_X86) + VARIABLE_TYPE_GPN = VARIABLE_TYPE_GPD, +#else + VARIABLE_TYPE_GPN = VARIABLE_TYPE_GPQ, +#endif + + //! @brief Variable is X87 (FPU). + VARIABLE_TYPE_X87 = 2, + + //! @brief Variable is X87 (FPU) SP-FP number (float). + VARIABLE_TYPE_X87_1F = 3, + + //! @brief Variable is X87 (FPU) DP-FP number (double). + VARIABLE_TYPE_X87_1D = 4, + + //! @brief Variable is MM register / memory location. + VARIABLE_TYPE_MM = 5, + + //! @brief Variable is XMM register / memory location. + VARIABLE_TYPE_XMM = 6, + + //! @brief Variable is SSE scalar SP-FP number. + VARIABLE_TYPE_XMM_1F = 7, + //! @brief Variable is SSE packed SP-FP number (4 floats). + VARIABLE_TYPE_XMM_4F = 8, + + //! @brief Variable is SSE2 scalar DP-FP number. + VARIABLE_TYPE_XMM_1D = 9, + //! @brief Variable is SSE2 packed DP-FP number (2 doubles). + VARIABLE_TYPE_XMM_2D = 10, + + //! @brief Count of variable types. + _VARIABLE_TYPE_COUNT = 11, + + // -------------------------------------------------------------------------- + // [Platform Independent] + // -------------------------------------------------------------------------- + + //! @brief Variable is 32-bit integer. + VARIABLE_TYPE_INT32 = VARIABLE_TYPE_GPD, + //! @brief Variable is 64-bit integer. + VARIABLE_TYPE_INT64 = VARIABLE_TYPE_GPQ, + //! @brief Variable is system dependent integer / pointer. + VARIABLE_TYPE_INTPTR = VARIABLE_TYPE_GPN, + +#if !defined(ASMJIT_NODOC) +#if defined(ASMJIT_X86) + VARIABLE_TYPE_FLOAT = VARIABLE_TYPE_X87_1F, + VARIABLE_TYPE_DOUBLE = VARIABLE_TYPE_X87_1D +#else + VARIABLE_TYPE_FLOAT = VARIABLE_TYPE_XMM_1F, + VARIABLE_TYPE_DOUBLE = VARIABLE_TYPE_XMM_1D +#endif +#else + //! @brief Variable is SP-FP (x87 or xmm). + VARIABLE_TYPE_FLOAT = XXX, + //! @brief Variable is DP-FP (x87 or xmm). + VARIABLE_TYPE_DOUBLE = XXX +#endif +}; + +// ============================================================================ +// [AsmJit::VARIABLE_HINT] +// ============================================================================ + +//! @brief Variable hint (used by @ref Compiler). +//! +//! @sa @ref Compiler. +enum VARIABLE_HINT +{ + //! @brief Alloc variable. + VARIABLE_HINT_ALLOC = 0, + //! @brief Spill variable. + VARIABLE_HINT_SPILL = 1, + //! @brief Save variable if modified. + VARIABLE_HINT_SAVE = 2, + //! @brief Save variable if modified and mark it as unused. + VARIABLE_HINT_SAVE_AND_UNUSE = 3, + //! @brief Mark variable as unused. + VARIABLE_HINT_UNUSE = 4 +}; + +//! @} + +} // AsmJit namespace + +// [Api-End] +#include "ApiEnd.h" + +// [Guard] +#endif // _ASMJIT_DEFSX86X64_H diff --git a/lib/AsmJit/Logger.cpp b/lib/AsmJit/Logger.cpp new file mode 100644 index 0000000..d32487b --- /dev/null +++ b/lib/AsmJit/Logger.cpp @@ -0,0 +1,111 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// We are using sprintf() here. +#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS) +#define _CRT_SECURE_NO_WARNINGS +#endif // _MSC_VER + +// [Dependencies] +#include "Logger.h" + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +// ============================================================================ +// [AsmJit::Logger] +// ============================================================================ + +Logger::Logger() ASMJIT_NOTHROW : + _enabled(true), + _used(true), + _logBinary(false) +{ +} + +Logger::~Logger() ASMJIT_NOTHROW +{ +} + +void Logger::logFormat(const char* fmt, ...) ASMJIT_NOTHROW +{ + char buf[1024]; + sysuint_t len; + + va_list ap; + va_start(ap, fmt); + len = vsnprintf(buf, 1023, fmt, ap); + va_end(ap); + + logString(buf, len); +} + +void Logger::setEnabled(bool enabled) ASMJIT_NOTHROW +{ + _enabled = enabled; + _used = enabled; +} + +// ============================================================================ +// [AsmJit::FileLogger] +// ============================================================================ + +FileLogger::FileLogger(FILE* stream) ASMJIT_NOTHROW + : _stream(NULL) +{ + setStream(stream); +} + +void FileLogger::logString(const char* buf, sysuint_t len) ASMJIT_NOTHROW +{ + if (!_used) return; + + if (len == (sysuint_t)-1) len = strlen(buf); + fwrite(buf, 1, len, _stream); +} + +void FileLogger::setEnabled(bool enabled) ASMJIT_NOTHROW +{ + _enabled = enabled; + _used = (_enabled == true) & (_stream != NULL); +} + +//! @brief Set file stream. +void FileLogger::setStream(FILE* stream) ASMJIT_NOTHROW +{ + _stream = stream; + _used = (_enabled == true) & (_stream != NULL); +} + +} // AsmJit namespace + +// [Api-End] +#include "ApiEnd.h" diff --git a/lib/AsmJit/Logger.h b/lib/AsmJit/Logger.h new file mode 100644 index 0000000..95920b0 --- /dev/null +++ b/lib/AsmJit/Logger.h @@ -0,0 +1,194 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_LOGGER_H +#define _ASMJIT_LOGGER_H + +// [Dependencies] +#include "Defs.h" + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +//! @addtogroup AsmJit_Logging +//! @{ + +//! @brief Abstract logging class. +//! +//! This class can be inherited and reimplemented to fit into your logging +//! subsystem. When reimplementing use @c AsmJit::Logger::log() method to +//! log into your stream. +//! +//! This class also contain @c _enabled member that can be used to enable +//! or disable logging. +struct ASMJIT_API Logger +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create logger. + Logger() ASMJIT_NOTHROW; + //! @brief Destroy logger. + virtual ~Logger() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Logging] + // -------------------------------------------------------------------------- + + //! @brief Abstract method to log output. + //! + //! Default implementation that is in @c AsmJit::Logger is to do nothing. + //! It's virtual to fit to your logging system. + virtual void logString(const char* buf, sysuint_t len = (sysuint_t)-1) ASMJIT_NOTHROW = 0; + + //! @brief Log formatter message (like sprintf) sending output to @c logString() method. + virtual void logFormat(const char* fmt, ...) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Enabled] + // -------------------------------------------------------------------------- + + //! @brief Return @c true if logging is enabled. + inline bool isEnabled() const ASMJIT_NOTHROW { return _enabled; } + + //! @brief Set logging to enabled or disabled. + virtual void setEnabled(bool enabled) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Used] + // -------------------------------------------------------------------------- + + //! @brief Get whether the logger should be used. + inline bool isUsed() const ASMJIT_NOTHROW { return _used; } + + // -------------------------------------------------------------------------- + // [LogBinary] + // -------------------------------------------------------------------------- + + //! @brief Get whether logging binary output. + inline bool getLogBinary() const { return _logBinary; } + //! @brief Get whether to log binary output. + inline void setLogBinary(bool val) { _logBinary = val; } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + + //! @brief Whether logger is enabled or disabled. + //! + //! Default @c true. + bool _enabled; + + //! @brief Whether logger is enabled and can be used. + //! + //! This value can be set by inherited classes to inform @c Logger that + //! assigned stream (or something that can log output) is invalid. If + //! @c _used is false it means that there is no logging output and AsmJit + //! shouldn't use this logger (because all messages will be lost). + //! + //! This is designed only to optimize cases that logger exists, but its + //! configured not to output messages. The API inside Logging and AsmJit + //! should only check this value when needed. The API outside AsmJit should + //! check only whether logging is @c _enabled. + //! + //! Default @c true. + bool _used; + + //! @brief Whether to log instruction in binary form. + bool _logBinary; + +private: + ASMJIT_DISABLE_COPY(Logger) +}; + +//! @brief Logger that can log to standard C @c FILE* stream. +struct ASMJIT_API FileLogger : public Logger +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new @c FileLogger. + //! @param stream FILE stream where logging will be sent (can be @c NULL + //! to disable logging). + FileLogger(FILE* stream = NULL) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Logging] + // -------------------------------------------------------------------------- + + virtual void logString(const char* buf, sysuint_t len = (sysuint_t)-1) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Enabled] + // -------------------------------------------------------------------------- + + virtual void setEnabled(bool enabled) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Stream] + // -------------------------------------------------------------------------- + + //! @brief Get @c FILE* stream. + //! + //! @note Return value can be @c NULL. + inline FILE* getStream() const ASMJIT_NOTHROW { return _stream; } + + //! @brief Set @c FILE* stream. + //! + //! @param stream @c FILE stream where to log output (can be @c NULL to + //! disable logging). + void setStream(FILE* stream) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + //! @brief C file stream. + FILE* _stream; + + ASMJIT_DISABLE_COPY(FileLogger) +}; + +//! @} + +} // AsmJit namespace + +// [Api-End] +#include "ApiEnd.h" + +// [Guard] +#endif // _ASMJIT_LOGGER_H diff --git a/lib/AsmJit/MemoryManager.cpp b/lib/AsmJit/MemoryManager.cpp new file mode 100644 index 0000000..cb6ffa9 --- /dev/null +++ b/lib/AsmJit/MemoryManager.cpp @@ -0,0 +1,1227 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Dependencies] +#include "Build.h" +#include "MemoryManager.h" +#include "Platform.h" + +#include <stdio.h> +#include <string.h> + +#include <new> + +// [Api-Begin] +#include "ApiBegin.h" + +// This file contains implementation of virtual memory management for AsmJit +// library. The initial concept is to keep this implementation simple but +// efficient. There are several goals I decided to write implementation myself. +// +// Goals: +// - We need usually to allocate blocks of 64 bytes long and more. +// - Alignment of allocated blocks is large - 32 bytes or 64 bytes. +// - Keep memory manager information outside allocated virtual memory pages +// (these pages allows execution of code). +// - Keep implementation small. +// +// I think that implementation is not small and probably not too much readable, +// so there is small know how. +// +// - Implementation is based on bit arrays and binary trees. Bit arrays +// contains information about allocated and unused blocks of memory. Each +// block size describes MemNode::density member. Count of blocks are +// stored in MemNode::blocks member. For example if density is 64 and +// count of blocks is 20, memory node contains 64*20 bytes of memory and +// smallest possible allocation (and also alignment) is 64 bytes. So density +// describes also memory alignment. Binary trees are used to enable fast +// lookup into all addresses allocated by memory manager instance. This is +// used mainly in MemoryManagerPrivate::free(). +// +// Bit array looks like this (empty = unused, X = used) - Size of block 64 +// ------------------------------------------------------------------------- +// | |X|X| | | | | |X|X|X|X|X|X| | | | | | | | | | | | |X| | | | |X|X|X| | | +// ------------------------------------------------------------------------- +// Bits array shows that there are 12 allocated blocks of 64 bytes, so total +// allocated size is 768 bytes. Maximum count of continuous blocks is 12 +// (see largest gap). + +namespace AsmJit { + +// ============================================================================ +// [Bits Manipulation] +// ============================================================================ + +#define BITS_PER_ENTITY (sizeof(sysuint_t) * 8) + +static void _SetBit(sysuint_t* buf, sysuint_t index) ASMJIT_NOTHROW +{ + sysuint_t i = index / BITS_PER_ENTITY; // sysuint_t[] + sysuint_t j = index % BITS_PER_ENTITY; // sysuint_t[][] bit index + + buf += i; + *buf |= (sysuint_t)1 << j; +} + +static void _ClearBit(sysuint_t* buf, sysuint_t index) ASMJIT_NOTHROW +{ + sysuint_t i = index / BITS_PER_ENTITY; // sysuint_t[] + sysuint_t j = index % BITS_PER_ENTITY; // sysuint_t[][] bit index + + buf += i; + *buf &= ~((sysuint_t)1 << j); +} + +static void _SetBits(sysuint_t* buf, sysuint_t index, sysuint_t len) ASMJIT_NOTHROW +{ + if (len == 0) return; + + sysuint_t i = index / BITS_PER_ENTITY; // sysuint_t[] + sysuint_t j = index % BITS_PER_ENTITY; // sysuint_t[][] bit index + + // How many bytes process in the first group. + sysuint_t c = BITS_PER_ENTITY - j; + if (c > len) c = len; + + // Offset. + buf += i; + + *buf++ |= (((sysuint_t)-1) >> (BITS_PER_ENTITY - c)) << j; + len -= c; + + while (len >= BITS_PER_ENTITY) + { + *buf++ = (sysuint_t)-1; + len -= BITS_PER_ENTITY; + } + + if (len) + { + *buf |= (((sysuint_t)-1) >> (BITS_PER_ENTITY - len)); + } +} + +static void _ClearBits(sysuint_t* buf, sysuint_t index, sysuint_t len) ASMJIT_NOTHROW +{ + if (len == 0) return; + + sysuint_t i = index / BITS_PER_ENTITY; // sysuint_t[] + sysuint_t j = index % BITS_PER_ENTITY; // sysuint_t[][] bit index + + // How many bytes process in the first group. + sysuint_t c = BITS_PER_ENTITY - j; + if (c > len) c = len; + + // Offset. + buf += i; + + *buf++ &= ~((((sysuint_t)-1) >> (BITS_PER_ENTITY - c)) << j); + len -= c; + + while (len >= BITS_PER_ENTITY) + { + *buf++ = 0; + len -= BITS_PER_ENTITY; + } + + if (len) + { + *buf &= ((sysuint_t)-1) << len; + } +} + +// ============================================================================ +// [AsmJit::MemNode] +// ============================================================================ + +#define M_DIV(x, y) ((x) / (y)) +#define M_MOD(x, y) ((x) % (y)) + +template<typename T> +struct ASMJIT_HIDDEN RbNode +{ + // -------------------------------------------------------------------------- + // [Node red-black tree tree, key is mem pointer]. + // -------------------------------------------------------------------------- + + // Implementation is based on article by Julienne Walker (Public Domain), + // including C code and original comments. Thanks for the excellent article. + + // Left[0] and right[1] nodes. + T* node[2]; + // Whether the node is RED. + uint32_t red; + + // -------------------------------------------------------------------------- + // [Chunk Memory] + // -------------------------------------------------------------------------- + + // Virtual memory address. + uint8_t* mem; +}; + +// Get whether the node is red (NULL or node with red flag). +template<typename T> +inline bool isRed(RbNode<T>* node) +{ + return node != NULL && node->red; +} + +struct ASMJIT_HIDDEN MemNode : public RbNode<MemNode> +{ + // -------------------------------------------------------------------------- + // [Node double-linked list] + // -------------------------------------------------------------------------- + + MemNode* prev; // Prev node in list. + MemNode* next; // Next node in list. + + // -------------------------------------------------------------------------- + // [Chunk Data] + // -------------------------------------------------------------------------- + + sysuint_t size; // How many bytes contain this node. + sysuint_t blocks; // How many blocks are here. + sysuint_t density; // Minimum count of allocated bytes in this node (also alignment). + sysuint_t used; // How many bytes are used in this node. + sysuint_t largestBlock; // Contains largest block that can be allocated. + + sysuint_t* baUsed; // Contains bits about used blocks. + // (0 = unused, 1 = used). + sysuint_t* baCont; // Contains bits about continuous blocks. + // (0 = stop, 1 = continue). + + // -------------------------------------------------------------------------- + // [Methods] + // -------------------------------------------------------------------------- + + // Get available space. + inline sysuint_t getAvailable() const ASMJIT_NOTHROW { return size - used; } + + inline void fillData(MemNode* other) + { + mem = other->mem; + + size = other->size; + blocks = other->blocks; + density = other->density; + used = other->used; + largestBlock = other->largestBlock; + baUsed = other->baUsed; + baCont = other->baCont; + } +}; + +// ============================================================================ +// [AsmJit::M_Permanent] +// ============================================================================ + +//! @brief Permanent node. +struct ASMJIT_HIDDEN PermanentNode +{ + uint8_t* mem; // Base pointer (virtual memory address). + sysuint_t size; // Count of bytes allocated. + sysuint_t used; // Count of bytes used. + PermanentNode* prev; // Pointer to prev chunk or NULL. + + // Get available space. + inline sysuint_t getAvailable() const ASMJIT_NOTHROW { return size - used; } +}; + +// ============================================================================ +// [AsmJit::MemoryManagerPrivate] +// ============================================================================ + +struct ASMJIT_HIDDEN MemoryManagerPrivate +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_WINDOWS) + MemoryManagerPrivate() ASMJIT_NOTHROW; +#else + MemoryManagerPrivate(HANDLE hProcess) ASMJIT_NOTHROW; +#endif // ASMJIT_WINDOWS + ~MemoryManagerPrivate() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Allocation] + // -------------------------------------------------------------------------- + + MemNode* createNode(sysuint_t size, sysuint_t density) ASMJIT_NOTHROW; + + void* allocPermanent(sysuint_t vsize) ASMJIT_NOTHROW; + void* allocFreeable(sysuint_t vsize) ASMJIT_NOTHROW; + + bool free(void* address) ASMJIT_NOTHROW; + bool shrink(void* address, sysuint_t used) ASMJIT_NOTHROW; + void freeAll(bool keepVirtualMemory) ASMJIT_NOTHROW; + + // Helpers to avoid ifdefs in the code. + inline uint8_t* allocVirtualMemory(sysuint_t size, sysuint_t* vsize) ASMJIT_NOTHROW + { +#if !defined(ASMJIT_WINDOWS) + return (uint8_t*)VirtualMemory::alloc(size, vsize, true); +#else + return (uint8_t*)VirtualMemory::allocProcessMemory(_hProcess, size, vsize, true); +#endif + } + + inline void freeVirtualMemory(void* vmem, sysuint_t vsize) ASMJIT_NOTHROW + { +#if !defined(ASMJIT_WINDOWS) + VirtualMemory::free(vmem, vsize); +#else + VirtualMemory::freeProcessMemory(_hProcess, vmem, vsize); +#endif + } + + // -------------------------------------------------------------------------- + // [NodeList RB-Tree] + // -------------------------------------------------------------------------- + + bool checkTree() ASMJIT_NOTHROW; + + void insertNode(MemNode* node) ASMJIT_NOTHROW; + MemNode* removeNode(MemNode* node) ASMJIT_NOTHROW; + MemNode* findPtr(uint8_t* mem) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_WINDOWS) + HANDLE _hProcess; // Process where to allocate memory. +#endif // ASMJIT_WINDOWS + Lock _lock; // Lock for thread safety. + + sysuint_t _newChunkSize; // Default node size. + sysuint_t _newChunkDensity; // Default node density. + sysuint_t _allocated; // How many bytes are allocated. + sysuint_t _used; // How many bytes are used. + + // Memory nodes list. + MemNode* _first; + MemNode* _last; + MemNode* _optimal; + + // Memory nodes tree. + MemNode* _root; + + // Permanent memory. + PermanentNode* _permanent; + + // Whether to keep virtual memory after destroy. + bool _keepVirtualMemory; +}; + +// ============================================================================ +// [AsmJit::MemoryManagerPrivate - Construction / Destruction] +// ============================================================================ + +#if !defined(ASMJIT_WINDOWS) +MemoryManagerPrivate::MemoryManagerPrivate() ASMJIT_NOTHROW : +#else +MemoryManagerPrivate::MemoryManagerPrivate(HANDLE hProcess) ASMJIT_NOTHROW : + _hProcess(hProcess), +#endif + _newChunkSize(65536), + _newChunkDensity(64), + _allocated(0), + _used(0), + _root(NULL), + _first(NULL), + _last(NULL), + _optimal(NULL), + _permanent(NULL), + _keepVirtualMemory(false) +{ +} + +MemoryManagerPrivate::~MemoryManagerPrivate() ASMJIT_NOTHROW +{ + // Freeable memory cleanup - Also frees the virtual memory if configured to. + freeAll(_keepVirtualMemory); + + // Permanent memory cleanup - Never frees the virtual memory. + PermanentNode* node = _permanent; + while (node) + { + PermanentNode* prev = node->prev; + ASMJIT_FREE(node); + node = prev; + } +} + +// ============================================================================ +// [AsmJit::MemoryManagerPrivate - Allocation] +// ============================================================================ + +// Allocates virtual memory node and MemNode structure. +// +// Returns MemNode* on success, otherwise NULL. +MemNode* MemoryManagerPrivate::createNode(sysuint_t size, sysuint_t density) ASMJIT_NOTHROW +{ + sysuint_t vsize; + uint8_t* vmem = allocVirtualMemory(size, &vsize); + + // Out of memory. + if (vmem == NULL) return NULL; + + sysuint_t blocks = (vsize / density); + sysuint_t bsize = (((blocks + 7) >> 3) + sizeof(sysuint_t) - 1) & ~(sysuint_t)(sizeof(sysuint_t)-1); + + MemNode* node = reinterpret_cast<MemNode*>(ASMJIT_MALLOC(sizeof(MemNode))); + uint8_t* data = reinterpret_cast<uint8_t*>(ASMJIT_MALLOC(bsize * 2)); + + // Out of memory. + if (node == NULL || data == NULL) + { + freeVirtualMemory(vmem, vsize); + if (node) ASMJIT_FREE(node); + if (data) ASMJIT_FREE(data); + return NULL; + } + + // Initialize RbNode data. + node->node[0] = NULL; + node->node[1] = NULL; + node->red = 1; + node->mem = vmem; + + // Initialize MemNode data. + node->prev = NULL; + node->next = NULL; + + node->size = vsize; + node->blocks = blocks; + node->density = density; + node->used = 0; + node->largestBlock = vsize; + + memset(data, 0, bsize * 2); + node->baUsed = reinterpret_cast<sysuint_t*>(data); + node->baCont = reinterpret_cast<sysuint_t*>(data + bsize); + + return node; +} + +void* MemoryManagerPrivate::allocPermanent(sysuint_t vsize) ASMJIT_NOTHROW +{ + static const sysuint_t permanentAlignment = 32; + static const sysuint_t permanentNodeSize = 32768; + + sysuint_t over = vsize % permanentAlignment; + if (over) over = permanentAlignment - over; + sysuint_t alignedSize = vsize + over; + + AutoLock locked(_lock); + + PermanentNode* node = _permanent; + + // Try to find space in allocated chunks. + while (node && alignedSize > node->getAvailable()) node = node->prev; + + // Or allocate new node. + if (!node) + { + sysuint_t nodeSize = permanentNodeSize; + if (vsize > nodeSize) nodeSize = vsize; + + node = (PermanentNode*)ASMJIT_MALLOC(sizeof(PermanentNode)); + // Out of memory. + if (node == NULL) return NULL; + + node->mem = allocVirtualMemory(nodeSize, &node->size); + // Out of memory. + if (node->mem == NULL) + { + ASMJIT_FREE(node); + return NULL; + } + + node->used = 0; + node->prev = _permanent; + _permanent = node; + } + + // Finally, copy function code to our space we reserved for. + uint8_t* result = node->mem + node->used; + + // Update Statistics. + node->used += alignedSize; + _used += alignedSize; + + // Code can be null to only reserve space for code. + return (void*)result; +} + +void* MemoryManagerPrivate::allocFreeable(sysuint_t vsize) ASMJIT_NOTHROW +{ + sysuint_t i; // Current index. + sysuint_t need; // How many we need to be freed. + sysuint_t minVSize; + + // Align to 32 bytes (our default alignment). + vsize = (vsize + 31) & ~(sysuint_t)31; + if (vsize == 0) return NULL; + + AutoLock locked(_lock); + MemNode* node = _optimal; + + minVSize = _newChunkSize; + + // Try to find memory block in existing nodes. + while (node) + { + // Skip this node? + if ((node->getAvailable() < vsize) || + (node->largestBlock < vsize && node->largestBlock != 0)) + { + MemNode* next = node->next; + if (node->getAvailable() < minVSize && node == _optimal && next) _optimal = next; + node = next; + continue; + } + + sysuint_t* up = node->baUsed; // Current ubits address. + sysuint_t ubits; // Current ubits[0] value. + sysuint_t bit; // Current bit mask. + sysuint_t blocks = node->blocks; // Count of blocks in node. + sysuint_t cont = 0; // How many bits are currently freed in find loop. + sysuint_t maxCont = 0; // Largest continuous block (bits count). + sysuint_t j; + + need = M_DIV((vsize + node->density - 1), node->density); + i = 0; + + // Try to find node that is large enough. + while (i < blocks) + { + ubits = *up++; + + // Fast skip used blocks. + if (ubits == (sysuint_t)-1) + { + if (cont > maxCont) maxCont = cont; + cont = 0; + + i += BITS_PER_ENTITY; + continue; + } + + sysuint_t max = BITS_PER_ENTITY; + if (i + max > blocks) max = blocks - i; + + for (j = 0, bit = 1; j < max; bit <<= 1) + { + j++; + if ((ubits & bit) == 0) + { + if (++cont == need) { i += j; i -= cont; goto found; } + continue; + } + + if (cont > maxCont) maxCont = cont; + cont = 0; + } + + i += BITS_PER_ENTITY; + } + + // Because we traversed entire node, we can set largest node size that + // will be used to cache next traversing.. + node->largestBlock = maxCont * node->density; + + node = node->next; + } + + // If we are here, we failed to find existing memory block and we must + // allocate new. + { + sysuint_t chunkSize = _newChunkSize; + if (chunkSize < vsize) chunkSize = vsize; + + node = createNode(chunkSize, _newChunkDensity); + if (node == NULL) return NULL; + + // Update binary tree. + insertNode(node); + ASMJIT_ASSERT(checkTree()); + + // Alloc first node at start. + i = 0; + need = (vsize + node->density - 1) / node->density; + + // Update statistics. + _allocated += node->size; + } + +found: + // Update bits. + _SetBits(node->baUsed, i, need); + _SetBits(node->baCont, i, need - 1); + + // Update statistics. + { + sysuint_t u = need * node->density; + node->used += u; + node->largestBlock = 0; + _used += u; + } + + // And return pointer to allocated memory. + uint8_t* result = node->mem + i * node->density; + ASMJIT_ASSERT(result >= node->mem && result <= node->mem + node->size - vsize); + return result; +} + +bool MemoryManagerPrivate::free(void* address) ASMJIT_NOTHROW +{ + if (address == NULL) return true; + + AutoLock locked(_lock); + + MemNode* node = findPtr((uint8_t*)address); + if (node == NULL) + return false; + + sysuint_t offset = (sysuint_t)((uint8_t*)address - (uint8_t*)node->mem); + sysuint_t bitpos = M_DIV(offset, node->density); + sysuint_t i = (bitpos / BITS_PER_ENTITY); + + sysuint_t* up = node->baUsed + i; // Current ubits address. + sysuint_t* cp = node->baCont + i; // Current cbits address. + sysuint_t ubits = *up; // Current ubits[0] value. + sysuint_t cbits = *cp; // Current cbits[0] value. + sysuint_t bit = (sysuint_t)1 << (bitpos % BITS_PER_ENTITY); + + sysuint_t cont = 0; + bool stop; + + for (;;) + { + stop = (cbits & bit) == 0; + ubits &= ~bit; + cbits &= ~bit; + + bit <<= 1; + cont++; + + if (stop || bit == 0) + { + *up = ubits; + *cp = cbits; + if (stop) break; + + ubits = *++up; + cbits = *++cp; + bit = 1; + } + } + + // If the freed block is fully allocated node then it's needed to + // update 'optimal' pointer in memory manager. + if (node->used == node->size) + { + MemNode* cur = _optimal; + + do { + cur = cur->prev; + if (cur == node) { _optimal = node; break; } + } while (cur); + } + + // Statistics. + cont *= node->density; + if (node->largestBlock < cont) node->largestBlock = cont; + node->used -= cont; + _used -= cont; + + // If page is empty, we can free it. + if (node->used == 0) + { + // Free memory associated with node (this memory is not accessed + // anymore so it's safe). + freeVirtualMemory(node->mem, node->size); + ASMJIT_FREE(node->baUsed); + + node->baUsed = NULL; + node->baCont = NULL; + + // Statistics. + _allocated -= node->size; + + // Remove node. This function can return different node than + // passed into, but data is copied into previous node if needed. + ASMJIT_FREE(removeNode(node)); + ASMJIT_ASSERT(checkTree()); + } + + return true; +} + +bool MemoryManagerPrivate::shrink(void* address, sysuint_t used) ASMJIT_NOTHROW +{ + if (address == NULL) return false; + if (used == 0) return free(address); + + AutoLock locked(_lock); + + MemNode* node = findPtr((uint8_t*)address); + if (node == NULL) + return false; + + sysuint_t offset = (sysuint_t)((uint8_t*)address - (uint8_t*)node->mem); + sysuint_t bitpos = M_DIV(offset, node->density); + sysuint_t i = (bitpos / BITS_PER_ENTITY); + + sysuint_t* up = node->baUsed + i; // Current ubits address. + sysuint_t* cp = node->baCont + i; // Current cbits address. + sysuint_t ubits = *up; // Current ubits[0] value. + sysuint_t cbits = *cp; // Current cbits[0] value. + sysuint_t bit = (sysuint_t)1 << (bitpos % BITS_PER_ENTITY); + + sysuint_t cont = 0; + sysuint_t usedBlocks = (used + node->density - 1) / node->density; + + bool stop; + + // Find the first block we can mark as free. + for (;;) + { + stop = (cbits & bit) == 0; + if (stop) return true; + + if (++cont == usedBlocks) break; + + bit <<= 1; + if (bit == 0) + { + ubits = *++up; + cbits = *++cp; + bit = 1; + } + } + + // Free the tail blocks. + cont = (sysuint_t)-1; + goto enterFreeLoop; + + for (;;) + { + stop = (cbits & bit) == 0; + ubits &= ~bit; +enterFreeLoop: + cbits &= ~bit; + + bit <<= 1; + cont++; + + if (stop || bit == 0) + { + *up = ubits; + *cp = cbits; + if (stop) break; + + ubits = *++up; + cbits = *++cp; + bit = 1; + } + } + + // Statistics. + cont *= node->density; + if (node->largestBlock < cont) node->largestBlock = cont; + node->used -= cont; + _used -= cont; + + return true; +} + +void MemoryManagerPrivate::freeAll(bool keepVirtualMemory) ASMJIT_NOTHROW +{ + MemNode* node = _first; + + while (node) + { + MemNode* next = node->next; + + if (!keepVirtualMemory) freeVirtualMemory(node->mem, node->size); + ASMJIT_FREE(node->baUsed); + ASMJIT_FREE(node); + + node = next; + } + + _allocated = 0; + _used = 0; + + _root = NULL; + _first = NULL; + _last = NULL; + _optimal = NULL; +} + +// ============================================================================ +// [AsmJit::MemoryManagerPrivate - NodeList RB-Tree] +// ============================================================================ + +static int rbAssert(MemNode* root) +{ + if (root == NULL) return 1; + + MemNode* ln = root->node[0]; + MemNode* rn = root->node[1]; + + // Red violation. + ASMJIT_ASSERT( !(isRed(root) && (isRed(ln) || isRed(rn))) ); + + int lh = rbAssert(ln); + int rh = rbAssert(rn); + + // Invalid btree. + ASMJIT_ASSERT(ln == NULL || ln->mem < root->mem); + ASMJIT_ASSERT(rn == NULL || rn->mem > root->mem); + + // Black violation. + ASMJIT_ASSERT( !(lh != 0 && rh != 0 && lh != rh) ); + + // Only count black links. + if (lh != 0 && rh != 0) + return isRed(root) ? lh : lh + 1; + else + return 0; +} + +static inline MemNode* rbRotateSingle(MemNode* root, int dir) +{ + MemNode* save = root->node[!dir]; + + root->node[!dir] = save->node[dir]; + save->node[dir] = root; + + root->red = 1; + save->red = 0; + + return save; +} + +static inline MemNode* rbRotateDouble(MemNode* root, int dir) +{ + root->node[!dir] = rbRotateSingle(root->node[!dir], !dir); + return rbRotateSingle(root, dir); +} + +bool MemoryManagerPrivate::checkTree() ASMJIT_NOTHROW +{ + return rbAssert(_root) > 0; +} + +void MemoryManagerPrivate::insertNode(MemNode* node) ASMJIT_NOTHROW +{ + if (_root == NULL) + { + // Empty tree case. + _root = node; + } + else + { + // False tree root. + RbNode<MemNode> head = {0}; + + // Grandparent & parent. + MemNode* g = NULL; + MemNode* t = reinterpret_cast<MemNode*>(&head); + + // Iterator & parent. + MemNode* p = NULL; + MemNode* q = t->node[1] = _root; + + int dir = 0, last; + + // Search down the tree. + for (;;) + { + if (q == NULL) + { + // Insert new node at the bottom. + q = node; + p->node[dir] = node; + } + else if (isRed(q->node[0]) && isRed(q->node[1])) + { + // Color flip. + q->red = 1; + q->node[0]->red = 0; + q->node[1]->red = 0; + } + + // Fix red violation. + if (isRed(q) && isRed(p)) + { + int dir2 = t->node[1] == g; + t->node[dir2] = (q == p->node[last]) ? rbRotateSingle(g, !last) : rbRotateDouble(g, !last); + } + + // Stop if found. + if (q == node) break; + + last = dir; + dir = q->mem < node->mem; + + // Update helpers. + if (g != NULL) t = g; + g = p; + p = q; + q = q->node[dir]; + } + + // Update root. + _root = head.node[1]; + } + + // Make root black. + _root->red = 0; + + // Link with others. + node->prev = _last; + + if (_first == NULL) + { + _first = node; + _last = node; + _optimal = node; + } + else + { + node->prev = _last; + _last->next = node; + _last = node; + } +} + +MemNode* MemoryManagerPrivate::removeNode(MemNode* node) ASMJIT_NOTHROW +{ + // False tree root. + RbNode<MemNode> head = {0}; + + // Helpers. + MemNode* q = reinterpret_cast<MemNode*>(&head); + MemNode* p = NULL; + MemNode* g = NULL; + // Found item. + MemNode* f = NULL; + int dir = 1; + + // Set up. + q->node[1] = _root; + + // Search and push a red down. + while (q->node[dir] != NULL) + { + int last = dir; + + // Update helpers. + g = p; + p = q; + q = q->node[dir]; + dir = q->mem < node->mem; + + // Save found node. + if (q == node) f = q; + + // Push the red node down. + if (!isRed(q) && !isRed(q->node[dir])) + { + if (isRed(q->node[!dir])) + { + p = p->node[last] = rbRotateSingle(q, dir); + } + else if (!isRed(q->node[!dir])) + { + MemNode* s = p->node[!last]; + + if (s != NULL) + { + if (!isRed(s->node[!last]) && !isRed(s->node[last])) + { + // Color flip. + p->red = 0; + s->red = 1; + q->red = 1; + } + else + { + int dir2 = g->node[1] == p; + + if (isRed(s->node[last])) + g->node[dir2] = rbRotateDouble(p, last); + else if (isRed(s->node[!last])) + g->node[dir2] = rbRotateSingle(p, last); + + // Ensure correct coloring. + q->red = g->node[dir2]->red = 1; + g->node[dir2]->node[0]->red = 0; + g->node[dir2]->node[1]->red = 0; + } + } + } + } + } + + // Replace and remove. + ASMJIT_ASSERT(f != NULL); + ASMJIT_ASSERT(f != reinterpret_cast<MemNode*>(&head)); + ASMJIT_ASSERT(q != reinterpret_cast<MemNode*>(&head)); + + if (f != q) f->fillData(q); + p->node[p->node[1] == q] = q->node[q->node[0] == NULL]; + + // Update root and make it black. + if ((_root = head.node[1]) != NULL) _root->red = 0; + + // Unlink. + MemNode* next = q->next; + MemNode* prev = q->prev; + + if (prev) { prev->next = next; } else { _first = next; } + if (next) { next->prev = prev; } else { _last = prev; } + if (_optimal == q) { _optimal = prev ? prev : next; } + + return q; +} + +MemNode* MemoryManagerPrivate::findPtr(uint8_t* mem) ASMJIT_NOTHROW +{ + MemNode* cur = _root; + while (cur) + { + uint8_t* curMem = cur->mem; + if (mem < curMem) + { + // Go left. + cur = cur->node[0]; + continue; + } + else + { + uint8_t* curEnd = curMem + cur->size; + if (mem >= curEnd) + { + // Go right. + cur = cur->node[1]; + continue; + } + else + { + // Match. + break; + } + } + } + return cur; +} + +// ============================================================================ +// [AsmJit::MemoryManager] +// ============================================================================ + +MemoryManager::MemoryManager() ASMJIT_NOTHROW +{ +} + +MemoryManager::~MemoryManager() ASMJIT_NOTHROW +{ +} + +MemoryManager* MemoryManager::getGlobal() ASMJIT_NOTHROW +{ + static VirtualMemoryManager memmgr; + return &memmgr; +} + +// ============================================================================ +// [AsmJit::VirtualMemoryManager] +// ============================================================================ + +#if !defined(ASMJIT_WINDOWS) +VirtualMemoryManager::VirtualMemoryManager() ASMJIT_NOTHROW +{ + MemoryManagerPrivate* d = new(std::nothrow) MemoryManagerPrivate(); + _d = (void*)d; +} +#else +VirtualMemoryManager::VirtualMemoryManager() ASMJIT_NOTHROW +{ + MemoryManagerPrivate* d = new(std::nothrow) MemoryManagerPrivate(GetCurrentProcess()); + _d = (void*)d; +} + +VirtualMemoryManager::VirtualMemoryManager(HANDLE hProcess) ASMJIT_NOTHROW +{ + MemoryManagerPrivate* d = new(std::nothrow) MemoryManagerPrivate(hProcess); + _d = (void*)d; +} +#endif // ASMJIT_WINDOWS + +VirtualMemoryManager::~VirtualMemoryManager() ASMJIT_NOTHROW +{ + MemoryManagerPrivate* d = reinterpret_cast<MemoryManagerPrivate*>(_d); + delete d; +} + +void* VirtualMemoryManager::alloc(sysuint_t size, uint32_t type) ASMJIT_NOTHROW +{ + MemoryManagerPrivate* d = reinterpret_cast<MemoryManagerPrivate*>(_d); + + if (type == MEMORY_ALLOC_PERMANENT) + return d->allocPermanent(size); + else + return d->allocFreeable(size); +} + +bool VirtualMemoryManager::free(void* address) ASMJIT_NOTHROW +{ + MemoryManagerPrivate* d = reinterpret_cast<MemoryManagerPrivate*>(_d); + return d->free(address); +} + +bool VirtualMemoryManager::shrink(void* address, sysuint_t used) ASMJIT_NOTHROW +{ + MemoryManagerPrivate* d = reinterpret_cast<MemoryManagerPrivate*>(_d); + return d->shrink(address, used); +} + +void VirtualMemoryManager::freeAll() ASMJIT_NOTHROW +{ + MemoryManagerPrivate* d = reinterpret_cast<MemoryManagerPrivate*>(_d); + + // Calling MemoryManager::freeAll() will never keep allocated memory. + return d->freeAll(false); +} + +sysuint_t VirtualMemoryManager::getUsedBytes() ASMJIT_NOTHROW +{ + MemoryManagerPrivate* d = reinterpret_cast<MemoryManagerPrivate*>(_d); + return d->_used; +} + +sysuint_t VirtualMemoryManager::getAllocatedBytes() ASMJIT_NOTHROW +{ + MemoryManagerPrivate* d = reinterpret_cast<MemoryManagerPrivate*>(_d); + return d->_allocated; +} + +bool VirtualMemoryManager::getKeepVirtualMemory() const ASMJIT_NOTHROW +{ + MemoryManagerPrivate* d = reinterpret_cast<MemoryManagerPrivate*>(_d); + return d->_keepVirtualMemory; +} + +void VirtualMemoryManager::setKeepVirtualMemory(bool keepVirtualMemory) ASMJIT_NOTHROW +{ + MemoryManagerPrivate* d = reinterpret_cast<MemoryManagerPrivate*>(_d); + d->_keepVirtualMemory = keepVirtualMemory; +} + +// ============================================================================ +// [AsmJit::VirtualMemoryManager - Debug] +// ============================================================================ + +#if defined(ASMJIT_MEMORY_MANAGER_DUMP) + +struct ASMJIT_HIDDEN GraphVizContext +{ + GraphVizContext(); + ~GraphVizContext(); + + bool openFile(const char* fileName); + void closeFile(); + + void dumpNode(MemNode* node); + void connect(MemNode* node, MemNode* other, const char* dst); + + FILE* file; +}; + +GraphVizContext::GraphVizContext() : + file(NULL) +{ +} + +GraphVizContext::~GraphVizContext() +{ + closeFile(); +} + +bool GraphVizContext::openFile(const char* fileName) +{ + file = fopen(fileName, "w"); + return file != NULL; +} + +void GraphVizContext::closeFile() +{ + if (file) { fclose(file); file = NULL; } +} + +void GraphVizContext::dumpNode(MemNode* node) +{ + fprintf(file, " NODE_%p [shape=record, style=filled, color=%s, label=\"<L>|<C>Mem: %p, Used: %d/%d|<R>\"];\n", + node, + node->red ? "red" : "gray", + node->mem, node->used, node->size); + + if (node->node[0]) connect(node, node->node[0], "L"); + if (node->node[1]) connect(node, node->node[1], "R"); +} + +void GraphVizContext::connect(MemNode* node, MemNode* other, const char* dst) +{ + dumpNode(other); + + fprintf(file, " NODE_%p:%s -> NODE_%p:C", node, dst, other); + if (other->red) fprintf(file, " [style=bold, color=red]"); + fprintf(file, ";\n"); +} + +void VirtualMemoryManager::dump(const char* fileName) +{ + MemoryManagerPrivate* d = reinterpret_cast<MemoryManagerPrivate*>(_d); + GraphVizContext ctx; + if (!ctx.openFile(fileName)) return; + + fprintf(ctx.file, "digraph {\n"); + if (d->_root) ctx.dumpNode(d->_root); + fprintf(ctx.file, "}\n"); +} +#endif // ASMJIT_MEMORY_MANAGER_DUMP + +} // AsmJit namespace + +// [Api-End] +#include "ApiEnd.h" diff --git a/lib/AsmJit/MemoryManager.h b/lib/AsmJit/MemoryManager.h new file mode 100644 index 0000000..03a7960 --- /dev/null +++ b/lib/AsmJit/MemoryManager.h @@ -0,0 +1,184 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_MEMORYMANAGER_H +#define _ASMJIT_MEMORYMANAGER_H + +// [Dependencies] +#include "Build.h" +#include "Defs.h" + +// [Api-Begin] +#include "ApiBegin.h" + +// [Debug] +// #define ASMJIT_MEMORY_MANAGER_DUMP + +namespace AsmJit { + +//! @addtogroup AsmJit_MemoryManagement +//! @{ + +// ============================================================================ +// [AsmJit::MemoryManager] +// ============================================================================ + +//! @brief Virtual memory manager interface. +//! +//! This class is pure virtual. You can get default virtual memory manager using +//! @c getGlobal() method. If you want to create more memory managers with same +//! functionality as global memory manager use @c VirtualMemoryManager class. +struct ASMJIT_API MemoryManager +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create memory manager instance. + MemoryManager() ASMJIT_NOTHROW; + //! @brief Destroy memory manager instance, this means also to free all memory + //! blocks. + virtual ~MemoryManager() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + //! @brief Allocate a @a size bytes of virtual memory. + //! + //! Note that if you are implementing your own virtual memory manager then you + //! can quitly ignore type of allocation. This is mainly for AsmJit to memory + //! manager that allocated memory will be never freed. + virtual void* alloc(sysuint_t size, uint32_t type = MEMORY_ALLOC_FREEABLE) ASMJIT_NOTHROW = 0; + //! @brief Free previously allocated memory at a given @a address. + virtual bool free(void* address) ASMJIT_NOTHROW = 0; + //! @brief Free some tail memory. + virtual bool shrink(void* address, sysuint_t used) ASMJIT_NOTHROW = 0; + //! @brief Free all allocated memory. + virtual void freeAll() ASMJIT_NOTHROW = 0; + + //! @brief Get how many bytes are currently used. + virtual sysuint_t getUsedBytes() ASMJIT_NOTHROW = 0; + //! @brief Get how many bytes are currently allocated. + virtual sysuint_t getAllocatedBytes() ASMJIT_NOTHROW = 0; + + //! @brief Get global memory manager instance. + //! + //! Global instance is instance of @c VirtualMemoryManager class. Global memory + //! manager is used by default by @ref Assembler::make() and @ref Compiler::make() + //! methods. + static MemoryManager* getGlobal() ASMJIT_NOTHROW; +}; + +//! @brief Reference implementation of memory manager that uses +//! @ref AsmJit::VirtualMemory class to allocate chunks of virtual memory +//! and bit arrays to manage it. +struct ASMJIT_API VirtualMemoryManager : public MemoryManager +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a @c VirtualMemoryManager instance. + VirtualMemoryManager() ASMJIT_NOTHROW; + +#if defined(ASMJIT_WINDOWS) + //! @brief Create a @c VirtualMemoryManager instance for process @a hProcess. + //! + //! This is specialized version of constructor available only for windows and + //! usable to alloc/free memory of different process. + VirtualMemoryManager(HANDLE hProcess) ASMJIT_NOTHROW; +#endif // ASMJIT_WINDOWS + + //! @brief Destroy the @c VirtualMemoryManager instance, this means also to + //! free all blocks. + virtual ~VirtualMemoryManager() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Interface] + // -------------------------------------------------------------------------- + + virtual void* alloc(sysuint_t size, uint32_t type = MEMORY_ALLOC_FREEABLE) ASMJIT_NOTHROW; + virtual bool free(void* address) ASMJIT_NOTHROW; + virtual bool shrink(void* address, sysuint_t used) ASMJIT_NOTHROW; + virtual void freeAll() ASMJIT_NOTHROW; + + virtual sysuint_t getUsedBytes() ASMJIT_NOTHROW; + virtual sysuint_t getAllocatedBytes() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Virtual Memory Manager Specific] + // -------------------------------------------------------------------------- + + //! @brief Get whether to keep allocated memory after memory manager is + //! destroyed. + //! + //! @sa @c setKeepVirtualMemory(). + bool getKeepVirtualMemory() const ASMJIT_NOTHROW; + + //! @brief Set whether to keep allocated memory after memory manager is + //! destroyed. + //! + //! This method is usable when patching code of remote process. You need to + //! allocate process memory, store generated assembler into it and patch the + //! method you want to redirect (into your code). This method affects only + //! VirtualMemoryManager destructor. After destruction all internal + //! structures are freed, only the process virtual memory remains. + //! + //! @note Memory allocated with MEMORY_ALLOC_PERMANENT is always kept. + //! + //! @sa @c getKeepVirtualMemory(). + void setKeepVirtualMemory(bool keepVirtualMemory) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Debug] + // -------------------------------------------------------------------------- + +#if defined(ASMJIT_MEMORY_MANAGER_DUMP) + //! @brief Dump memory manager tree into file. + //! + //! Generated output is using DOT language (from graphviz package). + void dump(const char* fileName); +#endif // ASMJIT_MEMORY_MANAGER_DUMP + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +protected: + //! @brief Pointer to private data hidden from the public API. + void* _d; +}; + +//! @} + +} // AsmJit namespace + +// [Api-End] +#include "ApiEnd.h" + +// [Guard] +#endif // _ASMJIT_MEMORYMANAGER_H diff --git a/lib/AsmJit/Operand.cpp b/lib/AsmJit/Operand.cpp new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/lib/AsmJit/Operand.cpp diff --git a/lib/AsmJit/Operand.h b/lib/AsmJit/Operand.h new file mode 100644 index 0000000..a3178e9 --- /dev/null +++ b/lib/AsmJit/Operand.h @@ -0,0 +1,51 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_OPERAND_H +#define _ASMJIT_OPERAND_H + +// [Dependencies] +#include "Build.h" + +namespace AsmJit { + +//! @addtogroup AsmJit_Core +//! @{ + +//! @} + +} // AsmJit namespace + +// ============================================================================ +// [Platform Specific] +// ============================================================================ + +#if defined(ASMJIT_X86) || defined(ASMJIT_X64) +#include "OperandX86X64.h" +#endif // ASMJIT_X86 || ASMJIT_X64 + +// [Guard] +#endif // _ASMJIT_OPERAND_H diff --git a/lib/AsmJit/OperandX86X64.cpp b/lib/AsmJit/OperandX86X64.cpp new file mode 100644 index 0000000..8c15d1c --- /dev/null +++ b/lib/AsmJit/OperandX86X64.cpp @@ -0,0 +1,419 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Dependencies] +#include "Defs.h" +#include "Operand.h" + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +// ============================================================================ +// [AsmJit::Registers - no_reg] +// ============================================================================ + +const GPReg no_reg(_Initialize(), INVALID_VALUE); + +// ============================================================================ +// [AsmJit::Registers - 8-bit] +// ============================================================================ + +const GPReg al(_Initialize(), REG_AL); +const GPReg cl(_Initialize(), REG_CL); +const GPReg dl(_Initialize(), REG_DL); +const GPReg bl(_Initialize(), REG_BL); + +#if defined(ASMJIT_X64) +const GPReg spl(_Initialize(), REG_SPL); +const GPReg bpl(_Initialize(), REG_BPL); +const GPReg sil(_Initialize(), REG_SIL); +const GPReg dil(_Initialize(), REG_DIL); + +const GPReg r8b(_Initialize(), REG_R8B); +const GPReg r9b(_Initialize(), REG_R9B); +const GPReg r10b(_Initialize(), REG_R10B); +const GPReg r11b(_Initialize(), REG_R11B); +const GPReg r12b(_Initialize(), REG_R12B); +const GPReg r13b(_Initialize(), REG_R13B); +const GPReg r14b(_Initialize(), REG_R14B); +const GPReg r15b(_Initialize(), REG_R15B); +#endif // ASMJIT_X64 + +const GPReg ah(_Initialize(), REG_AH); +const GPReg ch(_Initialize(), REG_CH); +const GPReg dh(_Initialize(), REG_DH); +const GPReg bh(_Initialize(), REG_BH); + +// ============================================================================ +// [AsmJit::Registers - 16-bit] +// ============================================================================ + +const GPReg ax(_Initialize(), REG_AX); +const GPReg cx(_Initialize(), REG_CX); +const GPReg dx(_Initialize(), REG_DX); +const GPReg bx(_Initialize(), REG_BX); +const GPReg sp(_Initialize(), REG_SP); +const GPReg bp(_Initialize(), REG_BP); +const GPReg si(_Initialize(), REG_SI); +const GPReg di(_Initialize(), REG_DI); + +#if defined(ASMJIT_X64) +const GPReg r8w(_Initialize(), REG_R8W); +const GPReg r9w(_Initialize(), REG_R9W); +const GPReg r10w(_Initialize(), REG_R10W); +const GPReg r11w(_Initialize(), REG_R11W); +const GPReg r12w(_Initialize(), REG_R12W); +const GPReg r13w(_Initialize(), REG_R13W); +const GPReg r14w(_Initialize(), REG_R14W); +const GPReg r15w(_Initialize(), REG_R15W); +#endif // ASMJIT_X64 + +// ============================================================================ +// [AsmJit::Registers - 32-bit] +// ============================================================================ + +const GPReg eax(_Initialize(), REG_EAX); +const GPReg ecx(_Initialize(), REG_ECX); +const GPReg edx(_Initialize(), REG_EDX); +const GPReg ebx(_Initialize(), REG_EBX); +const GPReg esp(_Initialize(), REG_ESP); +const GPReg ebp(_Initialize(), REG_EBP); +const GPReg esi(_Initialize(), REG_ESI); +const GPReg edi(_Initialize(), REG_EDI); + +#if defined(ASMJIT_X64) +const GPReg r8d(_Initialize(), REG_R8D); +const GPReg r9d(_Initialize(), REG_R9D); +const GPReg r10d(_Initialize(), REG_R10D); +const GPReg r11d(_Initialize(), REG_R11D); +const GPReg r12d(_Initialize(), REG_R12D); +const GPReg r13d(_Initialize(), REG_R13D); +const GPReg r14d(_Initialize(), REG_R14D); +const GPReg r15d(_Initialize(), REG_R15D); +#endif // ASMJIT_X64 + +// ============================================================================ +// [AsmJit::Registers - 64-bit] +// ============================================================================ + +#if defined(ASMJIT_X64) +const GPReg rax(_Initialize(), REG_RAX); +const GPReg rcx(_Initialize(), REG_RCX); +const GPReg rdx(_Initialize(), REG_RDX); +const GPReg rbx(_Initialize(), REG_RBX); +const GPReg rsp(_Initialize(), REG_RSP); +const GPReg rbp(_Initialize(), REG_RBP); +const GPReg rsi(_Initialize(), REG_RSI); +const GPReg rdi(_Initialize(), REG_RDI); + +const GPReg r8(_Initialize(), REG_R8); +const GPReg r9(_Initialize(), REG_R9); +const GPReg r10(_Initialize(), REG_R10); +const GPReg r11(_Initialize(), REG_R11); +const GPReg r12(_Initialize(), REG_R12); +const GPReg r13(_Initialize(), REG_R13); +const GPReg r14(_Initialize(), REG_R14); +const GPReg r15(_Initialize(), REG_R15); +#endif // ASMJIT_X64 + +// ============================================================================ +// [AsmJit::Registers - Native (AsmJit extension)] +// ============================================================================ + +const GPReg nax(_Initialize(), REG_NAX); +const GPReg ncx(_Initialize(), REG_NCX); +const GPReg ndx(_Initialize(), REG_NDX); +const GPReg nbx(_Initialize(), REG_NBX); +const GPReg nsp(_Initialize(), REG_NSP); +const GPReg nbp(_Initialize(), REG_NBP); +const GPReg nsi(_Initialize(), REG_NSI); +const GPReg ndi(_Initialize(), REG_NDI); + +// ============================================================================ +// [AsmJit::Registers - MM] +// ============================================================================ + +const MMReg mm0(_Initialize(), REG_MM0); +const MMReg mm1(_Initialize(), REG_MM1); +const MMReg mm2(_Initialize(), REG_MM2); +const MMReg mm3(_Initialize(), REG_MM3); +const MMReg mm4(_Initialize(), REG_MM4); +const MMReg mm5(_Initialize(), REG_MM5); +const MMReg mm6(_Initialize(), REG_MM6); +const MMReg mm7(_Initialize(), REG_MM7); + +// ============================================================================ +// [AsmJit::Registers - XMM] +// ============================================================================ + +const XMMReg xmm0(_Initialize(), REG_XMM0); +const XMMReg xmm1(_Initialize(), REG_XMM1); +const XMMReg xmm2(_Initialize(), REG_XMM2); +const XMMReg xmm3(_Initialize(), REG_XMM3); +const XMMReg xmm4(_Initialize(), REG_XMM4); +const XMMReg xmm5(_Initialize(), REG_XMM5); +const XMMReg xmm6(_Initialize(), REG_XMM6); +const XMMReg xmm7(_Initialize(), REG_XMM7); + +#if defined(ASMJIT_X64) +const XMMReg xmm8(_Initialize(), REG_XMM8); +const XMMReg xmm9(_Initialize(), REG_XMM9); +const XMMReg xmm10(_Initialize(), REG_XMM10); +const XMMReg xmm11(_Initialize(), REG_XMM11); +const XMMReg xmm12(_Initialize(), REG_XMM12); +const XMMReg xmm13(_Initialize(), REG_XMM13); +const XMMReg xmm14(_Initialize(), REG_XMM14); +const XMMReg xmm15(_Initialize(), REG_XMM15); +#endif // ASMJIT_X64 + +// ============================================================================ +// [AsmJit::Immediate] +// ============================================================================ + +//! @brief Create signed immediate value operand. +Imm imm(sysint_t i) ASMJIT_NOTHROW +{ + return Imm(i, false); +} + +//! @brief Create unsigned immediate value operand. +Imm uimm(sysuint_t i) ASMJIT_NOTHROW +{ + return Imm((sysint_t)i, true); +} + +// ============================================================================ +// [AsmJit::BaseVar] +// ============================================================================ + +Mem _baseVarMem(const BaseVar& var, uint32_t ptrSize) ASMJIT_NOTHROW +{ + Mem m; //(_DontInitialize()); + + m._mem.op = OPERAND_MEM; + m._mem.size = ptrSize == INVALID_VALUE ? var.getSize() : (uint8_t)ptrSize; + m._mem.type = OPERAND_MEM_NATIVE; + m._mem.segmentPrefix = SEGMENT_NONE; + + m._mem.id = var.getId(); + + m._mem.base = INVALID_VALUE; + m._mem.index = INVALID_VALUE; + m._mem.shift = 0; + + m._mem.target = NULL; + m._mem.displacement = 0; + + return m; +} + + +Mem _baseVarMem(const BaseVar& var, uint32_t ptrSize, sysint_t disp) ASMJIT_NOTHROW +{ + Mem m; //(_DontInitialize()); + + m._mem.op = OPERAND_MEM; + m._mem.size = ptrSize == INVALID_VALUE ? var.getSize() : (uint8_t)ptrSize; + m._mem.type = OPERAND_MEM_NATIVE; + m._mem.segmentPrefix = SEGMENT_NONE; + + m._mem.id = var.getId(); + + m._mem.base = INVALID_VALUE; + m._mem.index = INVALID_VALUE; + m._mem.shift = 0; + + m._mem.target = NULL; + m._mem.displacement = disp; + + return m; +} + +Mem _baseVarMem(const BaseVar& var, uint32_t ptrSize, const GPVar& index, uint32_t shift, sysint_t disp) ASMJIT_NOTHROW +{ + Mem m; //(_DontInitialize()); + + m._mem.op = OPERAND_MEM; + m._mem.size = ptrSize == INVALID_VALUE ? var.getSize() : (uint8_t)ptrSize; + m._mem.type = OPERAND_MEM_NATIVE; + m._mem.segmentPrefix = SEGMENT_NONE; + + m._mem.id = var.getId(); + + m._mem.base = INVALID_VALUE; + m._mem.index = index.getId(); + m._mem.shift = shift; + + m._mem.target = NULL; + m._mem.displacement = disp; + + return m; +} + +// ============================================================================ +// [AsmJit::Mem - ptr[]] +// ============================================================================ + +Mem _MemPtrBuild( + const Label& label, sysint_t disp, uint32_t ptrSize) + ASMJIT_NOTHROW +{ + return Mem(label, disp, ptrSize); +} + +Mem _MemPtrBuild( + const Label& label, + const GPReg& index, uint32_t shift, sysint_t disp, uint32_t ptrSize) + ASMJIT_NOTHROW +{ + Mem m(label, disp, ptrSize); + + m._mem.index = index.getRegIndex(); + m._mem.shift = shift; + + return m; +} + +Mem _MemPtrBuild( + const Label& label, + const GPVar& index, uint32_t shift, sysint_t disp, uint32_t ptrSize) + ASMJIT_NOTHROW +{ + Mem m(label, disp, ptrSize); + + m._mem.index = index.getId(); + m._mem.shift = shift; + + return m; +} + +// ============================================================================ +// [AsmJit::Mem - ptr[] - Absolute Addressing] +// ============================================================================ + +ASMJIT_API Mem _MemPtrAbs( + void* target, sysint_t disp, + uint32_t segmentPrefix, uint32_t ptrSize) + ASMJIT_NOTHROW +{ + Mem m; + + m._mem.size = ptrSize; + m._mem.type = OPERAND_MEM_ABSOLUTE; + m._mem.segmentPrefix = segmentPrefix; + + m._mem.target = target; + m._mem.displacement = disp; + + return m; +} + +ASMJIT_API Mem _MemPtrAbs( + void* target, + const GPReg& index, uint32_t shift, sysint_t disp, + uint32_t segmentPrefix, uint32_t ptrSize) + ASMJIT_NOTHROW +{ + Mem m;// (_DontInitialize()); + + m._mem.op = OPERAND_MEM; + m._mem.size = ptrSize; + m._mem.type = OPERAND_MEM_ABSOLUTE; + m._mem.segmentPrefix = (uint8_t)segmentPrefix; + + m._mem.id = INVALID_VALUE; + + m._mem.base = INVALID_VALUE; + m._mem.index = index.getRegIndex(); + m._mem.shift = shift; + + m._mem.target = target; + m._mem.displacement = disp; + + return m; +} + +ASMJIT_API Mem _MemPtrAbs( + void* target, + const GPVar& index, uint32_t shift, sysint_t disp, + uint32_t segmentPrefix, uint32_t ptrSize) + ASMJIT_NOTHROW +{ + Mem m;// (_DontInitialize()); + + m._mem.op = OPERAND_MEM; + m._mem.size = ptrSize; + m._mem.type = OPERAND_MEM_ABSOLUTE; + m._mem.segmentPrefix = (uint8_t)segmentPrefix; + + m._mem.id = INVALID_VALUE; + + m._mem.base = INVALID_VALUE; + m._mem.index = index.getId(); + m._mem.shift = shift; + + m._mem.target = target; + m._mem.displacement = disp; + + return m; +} + +// ============================================================================ +// [AsmJit::Mem - ptr[base + displacement]] +// ============================================================================ + +Mem _MemPtrBuild( + const GPReg& base, sysint_t disp, uint32_t ptrSize) + ASMJIT_NOTHROW +{ + return Mem(base, disp, ptrSize); +} + +Mem _MemPtrBuild( + const GPReg& base, + const GPReg& index, uint32_t shift, sysint_t disp, uint32_t ptrSize) + ASMJIT_NOTHROW +{ + return Mem(base, index, shift, disp, ptrSize); +} + +Mem _MemPtrBuild( + const GPVar& base, sysint_t disp, uint32_t ptrSize) + ASMJIT_NOTHROW +{ + return Mem(base, disp, ptrSize); +} + +Mem _MemPtrBuild( + const GPVar& base, + const GPVar& index, uint32_t shift, sysint_t disp, uint32_t ptrSize) + ASMJIT_NOTHROW +{ + return Mem(base, index, shift, disp, ptrSize); +} + +} // AsmJit namespace diff --git a/lib/AsmJit/OperandX86X64.h b/lib/AsmJit/OperandX86X64.h new file mode 100644 index 0000000..933a322 --- /dev/null +++ b/lib/AsmJit/OperandX86X64.h @@ -0,0 +1,2298 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_OPERANDX86X64_H +#define _ASMJIT_OPERANDX86X64_H + +#if !defined(_ASMJIT_OPERAND_H) +#warning "AsmJit/OperandX86X64.h can be only included by AsmJit/Operand.h" +#endif // _ASMJIT_OPERAND_H + +// [Dependencies] +#include "Build.h" +#include "Defs.h" + +namespace AsmJit { + +// ============================================================================ +// [AsmJit::Forward Declarations] +// ============================================================================ + +struct BaseReg; +struct BaseVar; +struct Compiler; +struct GPReg; +struct GPVar; +struct Imm; +struct Label; +struct Mem; +struct MMReg; +struct MMVar; +struct Operand; +struct X87Reg; +struct X87Var; +struct XMMReg; +struct XMMVar; + +//! @addtogroup AsmJit_Core +//! @{ + +// ============================================================================ +// [AsmJit::Operand] +// ============================================================================ + +//! @brief Operand, abstract class for register, memory location and immediate +//! value operands. +struct ASMJIT_HIDDEN Operand +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create an uninitialized operand. + inline Operand() ASMJIT_NOTHROW + { + memset(this, 0, sizeof(Operand)); + _base.id = INVALID_VALUE; + } + + //! @brief Create a reference to @a other operand. + inline Operand(const Operand& other) ASMJIT_NOTHROW + { + _init(other); + } + +#if !defined(ASMJIT_NODOC) + inline Operand(const _DontInitialize&) ASMJIT_NOTHROW + { + } +#endif // ASMJIT_NODOC + + // -------------------------------------------------------------------------- + // [Init & Copy] + // -------------------------------------------------------------------------- + + //! @internal + //! + //! @brief Initialize operand to @a other (used by constructors). + inline void _init(const Operand& other) ASMJIT_NOTHROW { memcpy(this, &other, sizeof(Operand)); } + //! @internal + //! + //! @brief Initialize operand to @a other (used by assign operators). + inline void _copy(const Operand& other) ASMJIT_NOTHROW { memcpy(this, &other, sizeof(Operand)); } + + // -------------------------------------------------------------------------- + // [Identification] + // -------------------------------------------------------------------------- + + //! @brief Get type of operand, see @c OPERAND_TYPE. + inline uint32_t getType() const ASMJIT_NOTHROW + { return _base.op; } + + //! @brief Get whether operand is none (@c OPERAND_NONE). + inline bool isNone() const ASMJIT_NOTHROW + { return (_base.op == OPERAND_NONE); } + + //! @brief Get whether operand is any (general purpose, mmx or sse) register (@c OPERAND_REG). + inline bool isReg() const ASMJIT_NOTHROW + { return (_base.op == OPERAND_REG); } + + //! @brief Get whether operand is memory address (@c OPERAND_MEM). + inline bool isMem() const ASMJIT_NOTHROW + { return (_base.op == OPERAND_MEM); } + + //! @brief Get whether operand is immediate (@c OPERAND_IMM). + inline bool isImm() const ASMJIT_NOTHROW + { return (_base.op == OPERAND_IMM); } + + //! @brief Get whether operand is label (@c OPERAND_LABEL). + inline bool isLabel() const ASMJIT_NOTHROW + { return (_base.op == OPERAND_LABEL); } + + //! @brief Get whether operand is variable (@c OPERAND_VAR). + inline bool isVar() const ASMJIT_NOTHROW + { return (_base.op == OPERAND_VAR); } + + //! @brief Get whether operand is variable or memory. + inline bool isVarMem() const ASMJIT_NOTHROW + { return ((_base.op & (OPERAND_VAR | OPERAND_MEM)) != 0); } + + //! @brief Get whether operand is register and type of register is @a regType. + inline bool isRegType(uint32_t regType) const ASMJIT_NOTHROW + { return (_base.op == OPERAND_REG) & ((_reg.code & REG_TYPE_MASK) == regType); } + + //! @brief Get whether operand is register and code of register is @a regCode. + inline bool isRegCode(uint32_t regCode) const ASMJIT_NOTHROW + { return (_base.op == OPERAND_REG) & (_reg.code == regCode); } + + //! @brief Get whether operand is register and index of register is @a regIndex. + inline bool isRegIndex(uint32_t regIndex) const ASMJIT_NOTHROW + { return (_base.op == OPERAND_REG) & ((_reg.code & REG_INDEX_MASK) == (regIndex & REG_INDEX_MASK)); } + + //! @brief Get whether operand is any register or memory. + inline bool isRegMem() const ASMJIT_NOTHROW + { return ((_base.op & (OPERAND_REG | OPERAND_MEM)) != 0); } + + //! @brief Get whether operand is register of @a regType type or memory. + inline bool isRegTypeMem(uint32_t regType) const ASMJIT_NOTHROW + { return ((_base.op == OPERAND_REG) & ((_reg.code & REG_TYPE_MASK) == regType)) | (_base.op == OPERAND_MEM); } + + // -------------------------------------------------------------------------- + // [Operand Size] + // -------------------------------------------------------------------------- + + //! @brief Return size of operand in bytes. + inline uint32_t getSize() const ASMJIT_NOTHROW + { return _base.size; } + + // -------------------------------------------------------------------------- + // [Operand Id] + // -------------------------------------------------------------------------- + + //! @brief Return operand Id (Operand Id's are used internally by + //! @c Assembler and @c Compiler classes). + //! + //! @note There is no way how to change or remove operand id. If you don't + //! need the operand just assign different operand to this one. + inline uint32_t getId() const ASMJIT_NOTHROW + { return _base.id; } + + // -------------------------------------------------------------------------- + // [Extensions] + // -------------------------------------------------------------------------- + + //! @brief Get whether the extended register (additional eight registers + //! introduced by 64-bit mode) is used. + inline bool isExtendedRegisterUsed() const ASMJIT_NOTHROW + { + // Hacky, but correct. + // - If operand type is register then extended register is register with + // index 8 and greater (8 to 15 inclusive). + // - If operand type is memory operand then we need to take care about + // label (in _mem.base) and INVALID_VALUE, we just decrement the value + // by 8 and check if it's at interval 0 to 7 inclusive (if it's there + // then it's extended register. + return (isReg() && (_reg.code & REG_INDEX_MASK) >= 8) || + (isMem() && ((((uint32_t)_mem.base - 8U) < 8U) || + (((uint32_t)_mem.index - 8U) < 8U) )); + } + + // -------------------------------------------------------------------------- + // [Data Structures] + // -------------------------------------------------------------------------- + + //! @internal + //! + //! @brief Base operand data shared between all operands. + struct BaseData + { + //! @brief Type of operand, see @c OPERAND_TYPE. + uint8_t op; + //! @brief Size of operand (register, address, immediate or variable). + uint8_t size; + + //! @brief Not used. + uint8_t reserved[2]; + + //! @brief Operand id (private variable for @c Assembler and @c Compiler classes). + //! + //! @note Uninitialized operands has id equal to zero. + uint32_t id; + }; + + //! @internal + //! + //! @brief Register data. + struct RegData + { + //! @brief Type of operand, see @c OPERAND_TYPE (in this case @c OPERAND_REG). + uint8_t op; + //! @brief Size of register. + uint8_t size; + + //! @brief Not used. + uint8_t reserved[2]; + + //! @brief Operand id. + uint32_t id; + + //! @brief Register code or variable, see @c REG and @c INVALID_VALUE. + uint32_t code; + }; + + //! @internal + //! + //! @brief Memory address data. + struct MemData + { + //! @brief Type of operand, see @c OPERAND_TYPE (in this case @c OPERAND_MEM). + uint8_t op; + //! @brief Size of pointer. + uint8_t size; + + //! @brief Memory operand type, see @c OPERAND_MEM_TYPE. + uint8_t type; + //! @brief Segment override prefix, see @c SEGMENT_PREFIX. + uint8_t segmentPrefix : 4; + //! @brief Index register shift (0 to 3 inclusive). + uint8_t shift : 4; + + //! @brief Operand ID. + uint32_t id; + + //! @brief Base register index, variable or label id. + uint32_t base; + //! @brief Index register index or variable. + uint32_t index; + + //! @brief Target (for 32-bit, absolute address). + void* target; + + //! @brief Displacement. + sysint_t displacement; + }; + + //! @internal + //! + //! @brief Immediate value data. + struct ImmData + { + //! @brief Type of operand, see @c OPERAND_TYPE (in this case @c OPERAND_IMM).. + uint8_t op; + //! @brief Size of immediate (or 0 to autodetect). + uint8_t size; + + //! @brief @c true if immediate is unsigned. + uint8_t isUnsigned; + //! @brief Not used. + uint8_t reserved; + + //! @brief Operand ID. + uint32_t id; + + //! @brief Immediate value. + sysint_t value; + }; + + //! @internal + //! + //! @brief Label data. + struct LblData + { + //! @brief Type of operand, see @c OPERAND_TYPE (in this case @c OPERAND_LABEL). + uint8_t op; + //! @brief Size of label, currently not used. + uint8_t size; + + //! @brief Not used. + uint8_t reserved[2]; + + //! @brief Operand ID. + uint32_t id; + }; + + //! @internal + //! + //! @brief Variable data. + struct VarData + { + //! @brief Type of operand, see @c OPERAND_TYPE (in this case @c OPERAND_VAR). + uint8_t op; + //! @brief Size of variable (0 if don't known). + uint8_t size; + + //! @brief Not used. + uint8_t reserved[2]; + + //! @brief Operand ID. + uint32_t id; + + //! @brief Type (and later also code) of register, see @c REG_TYPE, @c REG_CODE. + //! + //! @note Register code and variable code are two different things. In most + //! cases registerCode is very related to variableType, but general purpose + //! registers are divided to 64-bit, 32-bit, 16-bit and 8-bit entities so + //! the registerCode can be used to access these, variableType remains + //! unchanged from the initialization state. Variable type describes mainly + //! variable type and home memory size. + uint32_t registerCode; + + //! @brief Type of variable. See @c VARIABLE_TYPE enum. + uint32_t variableType; + }; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + union + { + //! @brief Generic operand data. + BaseData _base; + //! @brief Register operand data. + RegData _reg; + //! @brief Memory operand data. + MemData _mem; + //! @brief Immediate operand data. + ImmData _imm; + //! @brief Label data. + LblData _lbl; + //! @brief Variable data. + VarData _var; + }; +}; + +// ============================================================================ +// [AsmJit::BaseReg] +// ============================================================================ + +//! @brief Base class for all registers. +struct ASMJIT_HIDDEN BaseReg : public Operand +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new base register. + inline BaseReg(uint32_t code, uint32_t size) ASMJIT_NOTHROW : + Operand(_DontInitialize()) + { + _reg.op = OPERAND_REG; + _reg.size = (uint8_t)size; + _reg.id = INVALID_VALUE; + _reg.code = code; + } + + //! @brief Create a new reference to @a other. + inline BaseReg(const BaseReg& other) ASMJIT_NOTHROW : + Operand(other) + {} + +#if !defined(ASMJIT_NODOC) + inline BaseReg(const _DontInitialize& dontInitialize) ASMJIT_NOTHROW : + Operand(dontInitialize) + {} +#endif // ASMJIT_NODOC + + // -------------------------------------------------------------------------- + // [BaseReg Specific] + // -------------------------------------------------------------------------- + + //! @brief Get register code, see @c REG. + inline uint32_t getRegCode() const ASMJIT_NOTHROW + { return (uint32_t)(_reg.code); } + + //! @brief Get register type, see @c REG. + inline uint32_t getRegType() const ASMJIT_NOTHROW + { return (uint32_t)(_reg.code & REG_TYPE_MASK); } + + //! @brief Get register index (value from 0 to 7/15). + inline uint32_t getRegIndex() const ASMJIT_NOTHROW + { return (uint32_t)(_reg.code & REG_INDEX_MASK); } + + //! @brief Get whether register code is equal to @a code. + inline bool isRegCode(uint32_t code) const ASMJIT_NOTHROW + { return _reg.code == code; } + + //! @brief Get whether register code is equal to @a type. + inline bool isRegType(uint32_t type) const ASMJIT_NOTHROW + { return (uint32_t)(_reg.code & REG_TYPE_MASK) == type; } + + //! @brief Get whether register index is equal to @a index. + inline bool isRegIndex(uint32_t index) const ASMJIT_NOTHROW + { return (uint32_t)(_reg.code & REG_INDEX_MASK) == index; } + + //! @brief Set register code to @a code. + inline void setCode(uint32_t code) ASMJIT_NOTHROW + { _reg.code = code; } + + //! @brief Set register size to @a size. + inline void setSize(uint32_t size) ASMJIT_NOTHROW + { _reg.size = (uint8_t)size; } + + // -------------------------------------------------------------------------- + // [Overloaded Operators] + // -------------------------------------------------------------------------- + + inline BaseReg& operator=(const BaseReg& other) ASMJIT_NOTHROW + { _copy(other); return *this; } + + inline bool operator==(const BaseReg& other) const ASMJIT_NOTHROW + { return getRegCode() == other.getRegCode(); } + + inline bool operator!=(const BaseReg& other) const ASMJIT_NOTHROW + { return getRegCode() != other.getRegCode(); } +}; + +// ============================================================================ +// [AsmJit::Reg] +// ============================================================================ + +//! @brief General purpose register. +//! +//! This class is for all general purpose registers (64, 32, 16 and 8-bit). +struct ASMJIT_HIDDEN GPReg : public BaseReg +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create non-initialized general purpose register. + inline GPReg() ASMJIT_NOTHROW : + BaseReg(INVALID_VALUE, 0) {} + + //! @brief Create a reference to @a other general purpose register. + inline GPReg(const GPReg& other) ASMJIT_NOTHROW : + BaseReg(other) {} + +#if !defined(ASMJIT_NODOC) + inline GPReg(const _DontInitialize& dontInitialize) ASMJIT_NOTHROW : + BaseReg(dontInitialize) {} + + inline GPReg(const _Initialize&, uint32_t code) ASMJIT_NOTHROW : + BaseReg(code, static_cast<uint32_t>(1U << ((code & REG_TYPE_MASK) >> 12))) {} +#endif // ASMJIT_NODOC + + // -------------------------------------------------------------------------- + // [GPReg Specific] + // -------------------------------------------------------------------------- + + //! @brief Get whether the general purpose register is BYTE (8-bit) type. + inline bool isGPB() const ASMJIT_NOTHROW { return (_reg.code & REG_TYPE_MASK) <= REG_TYPE_GPB_HI; } + //! @brief Get whether the general purpose register is LO-BYTE (8-bit) type. + inline bool isGPBLo() const ASMJIT_NOTHROW { return (_reg.code & REG_TYPE_MASK) == REG_TYPE_GPB_LO; } + //! @brief Get whether the general purpose register is HI-BYTE (8-bit) type. + inline bool isGPBHi() const ASMJIT_NOTHROW { return (_reg.code & REG_TYPE_MASK) == REG_TYPE_GPB_HI; } + + //! @brief Get whether the general purpose register is WORD (16-bit) type. + inline bool isGPW() const ASMJIT_NOTHROW { return (_reg.code & REG_TYPE_MASK) == REG_TYPE_GPW; } + //! @brief Get whether the general purpose register is DWORD (32-bit) type. + //! + //! This is default type for 32-bit platforms. + inline bool isGPD() const ASMJIT_NOTHROW { return (_reg.code & REG_TYPE_MASK) == REG_TYPE_GPD; } + //! @brief Get whether the general purpose register is QWORD (64-bit) type. + //! + //! This is default type for 64-bit platforms. + inline bool isGPQ() const ASMJIT_NOTHROW { return (_reg.code & REG_TYPE_MASK) == REG_TYPE_GPQ; } + + // -------------------------------------------------------------------------- + // [Overloaded Operators] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_NODOC) + inline GPReg& operator=(const GPReg& other) ASMJIT_NOTHROW { _copy(other); return *this; } + inline bool operator==(const GPReg& other) const ASMJIT_NOTHROW { return getRegCode() == other.getRegCode(); } + inline bool operator!=(const GPReg& other) const ASMJIT_NOTHROW { return getRegCode() != other.getRegCode(); } +#endif // ASMJIT_NODOC +}; + +// ============================================================================ +// [AsmJit::X87Register] +// ============================================================================ + +//! @brief 80-bit x87 floating point register. +//! +//! To create instance of x87 register, use @c st() function. +struct ASMJIT_HIDDEN X87Reg : public BaseReg +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create non-initialized x87 register. + inline X87Reg() ASMJIT_NOTHROW : + BaseReg(INVALID_VALUE, 10) {} + + //! @brief Create a reference to @a other x87 register. + inline X87Reg(const X87Reg& other) ASMJIT_NOTHROW : + BaseReg(other) {} + +#if !defined(ASMJIT_NODOC) + inline X87Reg(const _DontInitialize& dontInitialize) ASMJIT_NOTHROW : + BaseReg(dontInitialize) {} + + inline X87Reg(const _Initialize&, uint32_t code) ASMJIT_NOTHROW : + BaseReg(code | REG_TYPE_X87, 10) {} +#endif // ASMJIT_NODOC + + // -------------------------------------------------------------------------- + // [Overloaded Operators] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_NODOC) + inline X87Reg& operator=(const X87Reg& other) ASMJIT_NOTHROW { _copy(other); return *this; } + inline bool operator==(const X87Reg& other) const ASMJIT_NOTHROW { return getRegCode() == other.getRegCode(); } + inline bool operator!=(const X87Reg& other) const ASMJIT_NOTHROW { return getRegCode() != other.getRegCode(); } +#endif // ASMJIT_NODOC +}; + +// ============================================================================ +// [AsmJit::MMReg] +// ============================================================================ + +//! @brief 64-bit MMX register. +struct ASMJIT_HIDDEN MMReg : public BaseReg +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create non-initialized MM register. + inline MMReg() ASMJIT_NOTHROW : + BaseReg(INVALID_VALUE, 8) {} + + //! @brief Create a reference to @a other MM register. + inline MMReg(const MMReg& other) ASMJIT_NOTHROW : + BaseReg(other) {} + +#if !defined(ASMJIT_NODOC) + inline MMReg(const _DontInitialize& dontInitialize) ASMJIT_NOTHROW : + BaseReg(dontInitialize) {} + + inline MMReg(const _Initialize&, uint32_t code) ASMJIT_NOTHROW : + BaseReg(code, 8) {} +#endif // ASMJIT_NODOC + + // -------------------------------------------------------------------------- + // [Overloaded Operators] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_NODOC) + inline MMReg& operator=(const MMReg& other) ASMJIT_NOTHROW { _copy(other); return *this; } + inline bool operator==(const MMReg& other) const ASMJIT_NOTHROW { return getRegCode() == other.getRegCode(); } + inline bool operator!=(const MMReg& other) const ASMJIT_NOTHROW { return getRegCode() != other.getRegCode(); } +#endif // ASMJIT_NODOC +}; + +// ============================================================================ +// [AsmJit::XMMReg] +// ============================================================================ + +//! @brief 128-bit SSE register. +struct ASMJIT_HIDDEN XMMReg : public BaseReg +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create non-initialized XMM register. + inline XMMReg() ASMJIT_NOTHROW : + BaseReg(INVALID_VALUE, 16) {} + + //! @brief Create a reference to @a other XMM register. + inline XMMReg(const _Initialize&, uint32_t code) ASMJIT_NOTHROW : + BaseReg(code, 16) {} + +#if !defined(ASMJIT_NODOC) + inline XMMReg(const _DontInitialize& dontInitialize) ASMJIT_NOTHROW : + BaseReg(dontInitialize) {} + + inline XMMReg(const XMMReg& other) ASMJIT_NOTHROW : + BaseReg(other) {} +#endif // ASMJIT_NODOC + + // -------------------------------------------------------------------------- + // [Overloaded Operators] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_NODOC) + inline XMMReg& operator=(const XMMReg& other) ASMJIT_NOTHROW { _copy(other); return *this; } + inline bool operator==(const XMMReg& other) const ASMJIT_NOTHROW { return getRegCode() == other.getRegCode(); } + inline bool operator!=(const XMMReg& other) const ASMJIT_NOTHROW { return getRegCode() != other.getRegCode(); } +#endif // ASMJIT_NODOC +}; + +// ============================================================================ +// [AsmJit::Registers - no_reg] +// ============================================================================ + +//! @brief No register, can be used only in @c Mem operand. +ASMJIT_VAR const GPReg no_reg; + +// ============================================================================ +// [AsmJit::Registers - 8-bit] +// ============================================================================ + +//! @brief 8-bit General purpose register. +ASMJIT_VAR const GPReg al; +//! @brief 8-bit General purpose register. +ASMJIT_VAR const GPReg cl; +//! @brief 8-bit General purpose register. +ASMJIT_VAR const GPReg dl; +//! @brief 8-bit General purpose register. +ASMJIT_VAR const GPReg bl; + +#if defined(ASMJIT_X64) +//! @brief 8-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg spl; +//! @brief 8-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg bpl; +//! @brief 8-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg sil; +//! @brief 8-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg dil; + +//! @brief 8-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r8b; +//! @brief 8-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r9b; +//! @brief 8-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r10b; +//! @brief 8-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r11b; +//! @brief 8-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r12b; +//! @brief 8-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r13b; +//! @brief 8-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r14b; +//! @brief 8-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r15b; +#endif // ASMJIT_X64 + +//! @brief 8-bit General purpose register. +ASMJIT_VAR const GPReg ah; +//! @brief 8-bit General purpose register. +ASMJIT_VAR const GPReg ch; +//! @brief 8-bit General purpose register. +ASMJIT_VAR const GPReg dh; +//! @brief 8-bit General purpose register. +ASMJIT_VAR const GPReg bh; + +// ============================================================================ +// [AsmJit::Registers - 16-bit] +// ============================================================================ + +//! @brief 16-bit General purpose register. +ASMJIT_VAR const GPReg ax; +//! @brief 16-bit General purpose register. +ASMJIT_VAR const GPReg cx; +//! @brief 16-bit General purpose register. +ASMJIT_VAR const GPReg dx; +//! @brief 16-bit General purpose register. +ASMJIT_VAR const GPReg bx; +//! @brief 16-bit General purpose register. +ASMJIT_VAR const GPReg sp; +//! @brief 16-bit General purpose register. +ASMJIT_VAR const GPReg bp; +//! @brief 16-bit General purpose register. +ASMJIT_VAR const GPReg si; +//! @brief 16-bit General purpose register. +ASMJIT_VAR const GPReg di; + +#if defined(ASMJIT_X64) +//! @brief 16-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r8w; +//! @brief 16-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r9w; +//! @brief 16-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r10w; +//! @brief 16-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r11w; +//! @brief 16-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r12w; +//! @brief 16-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r13w; +//! @brief 16-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r14w; +//! @brief 16-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r15w; +#endif // ASMJIT_X64 + +// ============================================================================ +// [AsmJit::Registers - 32-bit] +// ============================================================================ + +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg eax; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg ecx; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg edx; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg ebx; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg esp; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg ebp; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg esi; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg edi; + +#if defined(ASMJIT_X64) +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg r8d; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg r9d; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg r10d; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg r11d; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg r12d; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg r13d; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg r14d; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg r15d; +#endif // ASMJIT_X64 + +// ============================================================================ +// [AsmJit::Registers - 64-bit] +// ============================================================================ + +#if defined(ASMJIT_X64) +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg rax; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg rcx; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg rdx; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg rbx; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg rsp; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg rbp; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg rsi; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg rdi; + +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r8; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r9; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r10; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r11; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r12; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r13; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r14; +//! @brief 64-bit General purpose register (64-bit mode only). +ASMJIT_VAR const GPReg r15; +#endif // ASMJIT_X64 + +// ============================================================================ +// [AsmJit::Registers - Native (AsmJit extension)] +// ============================================================================ + +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg nax; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg ncx; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg ndx; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg nbx; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg nsp; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg nbp; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg nsi; +//! @brief 32-bit General purpose register. +ASMJIT_VAR const GPReg ndi; + +// ============================================================================ +// [AsmJit::Registers - MM] +// ============================================================================ + +//! @brief 64-bit MM register. +ASMJIT_VAR const MMReg mm0; +//! @brief 64-bit MM register. +ASMJIT_VAR const MMReg mm1; +//! @brief 64-bit MM register. +ASMJIT_VAR const MMReg mm2; +//! @brief 64-bit MM register. +ASMJIT_VAR const MMReg mm3; +//! @brief 64-bit MM register. +ASMJIT_VAR const MMReg mm4; +//! @brief 64-bit MM register. +ASMJIT_VAR const MMReg mm5; +//! @brief 64-bit MM register. +ASMJIT_VAR const MMReg mm6; +//! @brief 64-bit MM register. +ASMJIT_VAR const MMReg mm7; + +// ============================================================================ +// [AsmJit::Registers - XMM] +// ============================================================================ + +//! @brief 128-bit XMM register. +ASMJIT_VAR const XMMReg xmm0; +//! @brief 128-bit XMM register. +ASMJIT_VAR const XMMReg xmm1; +//! @brief 128-bit XMM register. +ASMJIT_VAR const XMMReg xmm2; +//! @brief 128-bit XMM register. +ASMJIT_VAR const XMMReg xmm3; +//! @brief 128-bit XMM register. +ASMJIT_VAR const XMMReg xmm4; +//! @brief 128-bit XMM register. +ASMJIT_VAR const XMMReg xmm5; +//! @brief 128-bit XMM register. +ASMJIT_VAR const XMMReg xmm6; +//! @brief 128-bit XMM register. +ASMJIT_VAR const XMMReg xmm7; + +#if defined(ASMJIT_X64) +//! @brief 128-bit XMM register (64-bit mode only). +ASMJIT_VAR const XMMReg xmm8; +//! @brief 128-bit XMM register (64-bit mode only). +ASMJIT_VAR const XMMReg xmm9; +//! @brief 128-bit XMM register (64-bit mode only). +ASMJIT_VAR const XMMReg xmm10; +//! @brief 128-bit XMM register (64-bit mode only). +ASMJIT_VAR const XMMReg xmm11; +//! @brief 128-bit XMM register (64-bit mode only). +ASMJIT_VAR const XMMReg xmm12; +//! @brief 128-bit XMM register (64-bit mode only). +ASMJIT_VAR const XMMReg xmm13; +//! @brief 128-bit XMM register (64-bit mode only). +ASMJIT_VAR const XMMReg xmm14; +//! @brief 128-bit XMM register (64-bit mode only). +ASMJIT_VAR const XMMReg xmm15; +#endif // ASMJIT_X64 + +// ============================================================================ +// [AsmJit::Registers - Register From Index] +// ============================================================================ + +//! @brief Get general purpose register of byte size. +static inline GPReg gpb_lo(uint32_t index) ASMJIT_NOTHROW +{ return GPReg(_Initialize(), static_cast<uint32_t>(index | REG_TYPE_GPB_LO)); } + +//! @brief Get general purpose register of byte size. +static inline GPReg gpb_hi(uint32_t index) ASMJIT_NOTHROW +{ return GPReg(_Initialize(), static_cast<uint32_t>(index | REG_TYPE_GPB_HI)); } + +//! @brief Get general purpose register of word size. +static inline GPReg gpw(uint32_t index) ASMJIT_NOTHROW +{ return GPReg(_Initialize(), static_cast<uint32_t>(index | REG_TYPE_GPW)); } + +//! @brief Get general purpose register of dword size. +static inline GPReg gpd(uint32_t index) ASMJIT_NOTHROW +{ return GPReg(_Initialize(), static_cast<uint32_t>(index | REG_TYPE_GPD)); } + +#if defined(ASMJIT_X64) +//! @brief Get general purpose register of qword size (64-bit only). +static inline GPReg gpq(uint32_t index) ASMJIT_NOTHROW +{ return GPReg(_Initialize(), static_cast<uint32_t>(index | REG_TYPE_GPQ)); } +#endif + +//! @brief Get general purpose dword/qword register (depending to architecture). +static inline GPReg gpn(uint32_t index) ASMJIT_NOTHROW +{ return GPReg(_Initialize(), static_cast<uint32_t>(index | REG_TYPE_GPN)); } + +//! @brief Get MMX (MM) register . +static inline MMReg mm(uint32_t index) ASMJIT_NOTHROW +{ return MMReg(_Initialize(), static_cast<uint32_t>(index | REG_TYPE_MM)); } + +//! @brief Get SSE (XMM) register. +static inline XMMReg xmm(uint32_t index) ASMJIT_NOTHROW +{ return XMMReg(_Initialize(), static_cast<uint32_t>(index | REG_TYPE_XMM)); } + +//! @brief Get x87 register with index @a i. +static inline X87Reg st(uint32_t i) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(i < 8); + return X87Reg(_Initialize(), static_cast<uint32_t>(i)); +} + +// ============================================================================ +// [AsmJit::Imm] +// ============================================================================ + +//! @brief Immediate operand. +//! +//! Immediate operand is part of instruction (it's inlined after it). +//! +//! To create immediate operand, use @c imm() and @c uimm() constructors +//! or constructors provided by @c Immediate class itself. +struct ASMJIT_HIDDEN Imm : public Operand +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create a new immediate value (initial value is 0). + Imm() ASMJIT_NOTHROW : + Operand(_DontInitialize()) + { + _imm.op = OPERAND_IMM; + _imm.size = 0; + _imm.isUnsigned = false; + _imm.reserved = 0; + + _imm.id = INVALID_VALUE; + _imm.value = 0; + } + + //! @brief Create a new signed immediate value, assigning the value to @a i. + Imm(sysint_t i) ASMJIT_NOTHROW : + Operand(_DontInitialize()) + { + _imm.op = OPERAND_IMM; + _imm.size = 0; + _imm.isUnsigned = false; + _imm.reserved = 0; + + _imm.id = INVALID_VALUE; + _imm.value = i; + } + + //! @brief Create a new signed or unsigned immediate value, assigning the value to @a i. + Imm(sysint_t i, bool isUnsigned) ASMJIT_NOTHROW : + Operand(_DontInitialize()) + { + _imm.op = OPERAND_IMM; + _imm.size = 0; + _imm.isUnsigned = isUnsigned; + _imm.reserved = 0; + + _imm.id = INVALID_VALUE; + _imm.value = i; + } + + //! @brief Create a new immediate value from @a other. + inline Imm(const Imm& other) ASMJIT_NOTHROW : + Operand(other) {} + + // -------------------------------------------------------------------------- + // [Immediate Specific] + // -------------------------------------------------------------------------- + + //! @brief Get whether an immediate is unsigned value. + inline bool isUnsigned() const ASMJIT_NOTHROW { return _imm.isUnsigned != 0; } + + //! @brief Get signed immediate value. + inline sysint_t getValue() const ASMJIT_NOTHROW { return _imm.value; } + + //! @brief Get unsigned immediate value. + inline sysuint_t getUValue() const ASMJIT_NOTHROW { return (sysuint_t)_imm.value; } + + //! @brief Set immediate value as signed type to @a val. + inline void setValue(sysint_t val, bool isUnsigned = false) ASMJIT_NOTHROW + { + _imm.value = val; + _imm.isUnsigned = isUnsigned; + } + + //! @brief Set immediate value as unsigned type to @a val. + inline void setUValue(sysuint_t val) ASMJIT_NOTHROW + { + _imm.value = (sysint_t)val; + _imm.isUnsigned = true; + } + + // -------------------------------------------------------------------------- + // [Overloaded Operators] + // -------------------------------------------------------------------------- + + //! @brief Assign a signed value @a val to the immediate operand. + inline Imm& operator=(sysint_t val) ASMJIT_NOTHROW + { setValue(val); return *this; } + + //! @brief Assign @a other to the immediate operand. + inline Imm& operator=(const Imm& other) ASMJIT_NOTHROW + { _copy(other); return *this; } +}; + +//! @brief Create signed immediate value operand. +ASMJIT_API Imm imm(sysint_t i) ASMJIT_NOTHROW; + +//! @brief Create unsigned immediate value operand. +ASMJIT_API Imm uimm(sysuint_t i) ASMJIT_NOTHROW; + +// ============================================================================ +// [AsmJit::Label] +// ============================================================================ + +//! @brief Label (jump target or data location). +//! +//! Label represents locations typically used as jump targets, but may be also +//! used as position where are stored constants or static variables. If you +//! want to use @c Label you need first to associate it with @c Assembler or +//! @c Compiler instance. To create new label use @c Assembler::newLabel() or +//! @c Compiler::newLabel(). +//! +//! Example of using labels: +//! +//! @code +//! // Create Assembler or Compiler instance. +//! Assembler a; +//! +//! // Create Label instance. +//! Label L_1(a); +//! +//! // ... your code ... +//! +//! // Using label, see @c AsmJit::Assembler or @c AsmJit::Compiler. +//! a.jump(L_1); +//! +//! // ... your code ... +//! +//! // Bind label to current position, see @c AsmJit::Assembler::bind() or +//! // @c AsmJit::Compiler::bind(). +//! a.bind(L_1); +//! @endcode +struct ASMJIT_HIDDEN Label : public Operand +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new, unassociated label. + inline Label() ASMJIT_NOTHROW : + Operand(_DontInitialize()) + { + _lbl.op = OPERAND_LABEL; + _lbl.size = 0; + _lbl.id = INVALID_VALUE; + } + + //! @brief Create reference to another label. + inline Label(const Label& other) ASMJIT_NOTHROW : + Operand(other) + { + } + + //! @brief Destroy the label. + inline ~Label() ASMJIT_NOTHROW + { + } + + // -------------------------------------------------------------------------- + // [Overloaded Operators] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_NODOC) + inline Label& operator=(const Label& other) + { _copy(other); return *this; } + + inline bool operator==(const Label& other) const ASMJIT_NOTHROW { return _base.id == other._base.id; } + inline bool operator!=(const Label& other) const ASMJIT_NOTHROW { return _base.id != other._base.id; } +#endif // ASMJIT_NODOC +}; + +// ============================================================================ +// [AsmJit::Mem] +// ============================================================================ + +//! @brief Memory operand. +struct ASMJIT_HIDDEN Mem : public Operand +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + inline Mem() ASMJIT_NOTHROW : + Operand(_DontInitialize()) + { + _mem.op = OPERAND_MEM; + _mem.size = 0; + _mem.type = OPERAND_MEM_NATIVE; + _mem.segmentPrefix = SEGMENT_NONE; + + _mem.id = INVALID_VALUE; + + _mem.base = INVALID_VALUE; + _mem.index = INVALID_VALUE; + _mem.shift = 0; + + _mem.target = NULL; + _mem.displacement = 0; + } + + inline Mem(const Label& label, sysint_t displacement, uint32_t size = 0) ASMJIT_NOTHROW : + Operand(_DontInitialize()) + { + _mem.op = OPERAND_MEM; + _mem.size = (uint8_t)size; + _mem.type = OPERAND_MEM_LABEL; + _mem.segmentPrefix = SEGMENT_NONE; + + _mem.id = INVALID_VALUE; + + _mem.base = reinterpret_cast<const Operand&>(label)._base.id; + _mem.index = INVALID_VALUE; + _mem.shift = 0; + + _mem.target = NULL; + _mem.displacement = displacement; + } + + inline Mem(const GPReg& base, sysint_t displacement, uint32_t size = 0) ASMJIT_NOTHROW : + Operand(_DontInitialize()) + { + _mem.op = OPERAND_MEM; + _mem.size = (uint8_t)size; + _mem.type = OPERAND_MEM_NATIVE; + _mem.segmentPrefix = SEGMENT_NONE; + + _mem.id = INVALID_VALUE; + + _mem.base = base.getRegCode() & REG_INDEX_MASK; + _mem.index = INVALID_VALUE; + _mem.shift = 0; + + _mem.target = NULL; + _mem.displacement = displacement; + } + + inline Mem(const GPVar& base, sysint_t displacement, uint32_t size = 0) ASMJIT_NOTHROW : + Operand(_DontInitialize()) + { + _mem.op = OPERAND_MEM; + _mem.size = (uint8_t)size; + _mem.type = OPERAND_MEM_NATIVE; + _mem.segmentPrefix = SEGMENT_NONE; + + _mem.id = INVALID_VALUE; + + _mem.base = reinterpret_cast<const Operand&>(base).getId(); + _mem.index = INVALID_VALUE; + _mem.shift = 0; + + _mem.target = NULL; + _mem.displacement = displacement; + } + + inline Mem(const GPReg& base, const GPReg& index, uint32_t shift, sysint_t displacement, uint32_t size = 0) ASMJIT_NOTHROW : + Operand(_DontInitialize()) + { + ASMJIT_ASSERT(shift <= 3); + + _mem.op = OPERAND_MEM; + _mem.size = (uint8_t)size; + _mem.type = OPERAND_MEM_NATIVE; + _mem.segmentPrefix = SEGMENT_NONE; + + _mem.id = INVALID_VALUE; + + _mem.base = base.getRegIndex(); + _mem.index = index.getRegIndex(); + _mem.shift = (uint8_t)shift; + + _mem.target = NULL; + _mem.displacement = displacement; + } + + inline Mem(const GPVar& base, const GPVar& index, uint32_t shift, sysint_t displacement, uint32_t size = 0) ASMJIT_NOTHROW : + Operand(_DontInitialize()) + { + ASMJIT_ASSERT(shift <= 3); + + _mem.op = OPERAND_MEM; + _mem.size = (uint8_t)size; + _mem.type = OPERAND_MEM_NATIVE; + _mem.segmentPrefix = SEGMENT_NONE; + + _mem.id = INVALID_VALUE; + + _mem.base = reinterpret_cast<const Operand&>(base).getId(); + _mem.index = reinterpret_cast<const Operand&>(index).getId(); + _mem.shift = (uint8_t)shift; + + _mem.target = NULL; + _mem.displacement = displacement; + } + + inline Mem(const Mem& other) ASMJIT_NOTHROW : + Operand(other) + { + } + + inline Mem(const _DontInitialize& dontInitialize) ASMJIT_NOTHROW : + Operand(dontInitialize) + { + } + + // -------------------------------------------------------------------------- + // [Mem Specific] + // -------------------------------------------------------------------------- + + //! @brief Get type of memory operand, see @c OPERAND_MEM_TYPE enum. + inline uint32_t getMemType() const ASMJIT_NOTHROW + { return _mem.type; } + + //! @brief Get memory operand segment prefix, see @c SEGMENT_PREFIX enum. + inline uint32_t getSegmentPrefix() const ASMJIT_NOTHROW + { return _mem.segmentPrefix; } + + //! @brief Get whether the memory operand has base register. + inline bool hasBase() const ASMJIT_NOTHROW + { return _mem.base != INVALID_VALUE; } + + //! @brief Get whether the memory operand has index. + inline bool hasIndex() const ASMJIT_NOTHROW + { return _mem.index != INVALID_VALUE; } + + //! @brief Get whether the memory operand has shift used. + inline bool hasShift() const ASMJIT_NOTHROW + { return _mem.shift != 0; } + + //! @brief Get memory operand base register or @c INVALID_VALUE. + inline uint32_t getBase() const ASMJIT_NOTHROW + { return _mem.base; } + + //! @brief Get memory operand index register or @c INVALID_VALUE. + inline uint32_t getIndex() const ASMJIT_NOTHROW + { return _mem.index; } + + //! @brief Get memory operand index scale (0, 1, 2 or 3). + inline uint32_t getShift() const ASMJIT_NOTHROW + { return _mem.shift; } + + //! @brief Get absolute target address. + //! + //! @note You should always check if operand contains address by @c getMemType(). + inline void* getTarget() const ASMJIT_NOTHROW + { return _mem.target; } + + //! @brief Set memory operand size. + inline void setSize(uint32_t size) ASMJIT_NOTHROW + { _mem.size = size; } + + //! @brief Set absolute target address. + inline void setTarget(void* target) ASMJIT_NOTHROW + { _mem.target = target; } + + //! @brief Get memory operand relative displacement. + inline sysint_t getDisplacement() const ASMJIT_NOTHROW + { return _mem.displacement; } + + //! @brief Set memory operand relative displacement. + inline void setDisplacement(sysint_t displacement) ASMJIT_NOTHROW + { _mem.displacement = displacement; } + + //! @brief Adjust memory operand relative displacement by @a displacement. + inline void adjust(sysint_t displacement) ASMJIT_NOTHROW + { + _mem.displacement += displacement; + } + + //! @brief Return new memory operand adjusted by @a displacement. + inline Mem adjusted(sysint_t displacement) ASMJIT_NOTHROW + { + Mem result(*this); + result.adjust(displacement); + return result; + } + + // -------------------------------------------------------------------------- + // [Overloaded Operators] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_NODOC) + inline Mem& operator=(const Mem& other) ASMJIT_NOTHROW { _copy(other); return *this; } + + inline bool operator==(const Mem& other) const ASMJIT_NOTHROW + { + return + _mem.size == other._mem.size && + _mem.type == other._mem.type && + _mem.segmentPrefix == other._mem.segmentPrefix && + _mem.base == other._mem.base && + _mem.index == other._mem.index && + _mem.shift == other._mem.shift && + _mem.target == other._mem.target && + _mem.displacement == other._mem.displacement; + } + + inline bool operator!=(const Mem& other) const ASMJIT_NOTHROW { return *this == other; } +#endif // ASMJIT_NODOC +}; + +// ============================================================================ +// [AsmJit::BaseVar] +// ============================================================================ + +//! @internal +ASMJIT_API Mem _baseVarMem(const BaseVar& var, uint32_t ptrSize) ASMJIT_NOTHROW; + +//! @internal +ASMJIT_API Mem _baseVarMem(const BaseVar& var, uint32_t ptrSize, sysint_t disp) ASMJIT_NOTHROW; + +//! @internal +ASMJIT_API Mem _baseVarMem(const BaseVar& var, uint32_t ptrSize, const GPVar& index, uint32_t shift, sysint_t disp) ASMJIT_NOTHROW; + +//! @brief Base class for all variables. +struct ASMJIT_HIDDEN BaseVar : public Operand +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_NODOC) + inline BaseVar(const _DontInitialize& dontInitialize) ASMJIT_NOTHROW : + Operand(dontInitialize) + { + } +#endif // ASMJIT_NODOC + + inline BaseVar() ASMJIT_NOTHROW : + Operand(_DontInitialize()) + { + _var.op = OPERAND_VAR; + _var.size = 0; + _var.registerCode = INVALID_VALUE; + _var.variableType = INVALID_VALUE; + _var.id = INVALID_VALUE; + } + + inline BaseVar(const BaseVar& other) ASMJIT_NOTHROW : + Operand(other) + { + } + + // -------------------------------------------------------------------------- + // [Type] + // -------------------------------------------------------------------------- + + inline uint32_t getVariableType() const ASMJIT_NOTHROW + { return _var.variableType; } + + inline bool isGPVar() const ASMJIT_NOTHROW + { return _var.variableType <= VARIABLE_TYPE_GPQ; } + + inline bool isX87Var() const ASMJIT_NOTHROW + { return _var.variableType >= VARIABLE_TYPE_X87 && _var.variableType <= VARIABLE_TYPE_X87_1D; } + + inline bool isMMVar() const ASMJIT_NOTHROW + { return _var.variableType == VARIABLE_TYPE_MM; } + + inline bool isXMMVar() const ASMJIT_NOTHROW + { return _var.variableType >= VARIABLE_TYPE_XMM && _var.variableType <= VARIABLE_TYPE_XMM_2D; } + + // -------------------------------------------------------------------------- + // [Memory Cast] + // -------------------------------------------------------------------------- + + //! @brief Cast this variable to memory operand. + //! + //! @note Size of operand depends to native variable type, you can use other + //! variants if you want specific one. + inline Mem m() const ASMJIT_NOTHROW + { return _baseVarMem(*this, INVALID_VALUE); } + + //! @overload. + inline Mem m(sysint_t disp) const ASMJIT_NOTHROW + { return _baseVarMem(*this, INVALID_VALUE, disp); } + + //! @overload. + inline Mem m(const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) const ASMJIT_NOTHROW + { return _baseVarMem(*this, INVALID_VALUE, index, shift, disp); } + + //! @brief Cast this variable to 8-bit memory operand. + inline Mem m8() const ASMJIT_NOTHROW + { return _baseVarMem(*this, 1); } + + //! @overload. + inline Mem m8(sysint_t disp) const ASMJIT_NOTHROW + { return _baseVarMem(*this, 1, disp); } + + //! @overload. + inline Mem m8(const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) const ASMJIT_NOTHROW + { return _baseVarMem(*this, 1, index, shift, disp); } + + //! @brief Cast this variable to 16-bit memory operand. + inline Mem m16() const ASMJIT_NOTHROW + { return _baseVarMem(*this, 2); } + + //! @overload. + inline Mem m16(sysint_t disp) const ASMJIT_NOTHROW + { return _baseVarMem(*this, 2, disp); } + + //! @overload. + inline Mem m16(const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) const ASMJIT_NOTHROW + { return _baseVarMem(*this, 2, index, shift, disp); } + + //! @brief Cast this variable to 32-bit memory operand. + inline Mem m32() const ASMJIT_NOTHROW + { return _baseVarMem(*this, 4); } + + //! @overload. + inline Mem m32(sysint_t disp) const ASMJIT_NOTHROW + { return _baseVarMem(*this, 4, disp); } + + //! @overload. + inline Mem m32(const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) const ASMJIT_NOTHROW + { return _baseVarMem(*this, 4, index, shift, disp); } + + //! @brief Cast this variable to 64-bit memory operand. + inline Mem m64() const ASMJIT_NOTHROW + { return _baseVarMem(*this, 8); } + + //! @overload. + inline Mem m64(sysint_t disp) const ASMJIT_NOTHROW + { return _baseVarMem(*this, 8, disp); } + + //! @overload. + inline Mem m64(const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) const ASMJIT_NOTHROW + { return _baseVarMem(*this, 8, index, shift, disp); } + + //! @brief Cast this variable to 80-bit memory operand (long double). + inline Mem m80() const ASMJIT_NOTHROW + { return _baseVarMem(*this, 10); } + + //! @overload. + inline Mem m80(sysint_t disp) const ASMJIT_NOTHROW + { return _baseVarMem(*this, 10, disp); } + + //! @overload. + inline Mem m80(const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) const ASMJIT_NOTHROW + { return _baseVarMem(*this, 10, index, shift, disp); } + + //! @brief Cast this variable to 128-bit memory operand. + inline Mem m128() const ASMJIT_NOTHROW + { return _baseVarMem(*this, 16); } + + //! @overload. + inline Mem m128(sysint_t disp) const ASMJIT_NOTHROW + { return _baseVarMem(*this, 16, disp); } + + //! @overload. + inline Mem m128(const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) const ASMJIT_NOTHROW + { return _baseVarMem(*this, 16, index, shift, disp); } + + // -------------------------------------------------------------------------- + // [Overloaded Operators] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_NODOC) + inline BaseVar& operator=(const BaseVar& other) ASMJIT_NOTHROW + { _copy(other); return *this; } + + inline bool operator==(const BaseVar& other) const ASMJIT_NOTHROW { return _base.id == other._base.id && _var.registerCode == other._var.registerCode; } + inline bool operator!=(const BaseVar& other) const ASMJIT_NOTHROW { return _base.id != other._base.id || _var.registerCode != other._var.registerCode; } +#endif // ASMJIT_NODOC + + // -------------------------------------------------------------------------- + // [Private] + // -------------------------------------------------------------------------- + +protected: + inline BaseVar(const BaseVar& other, uint32_t registerCode, uint32_t size) ASMJIT_NOTHROW : + Operand(_DontInitialize()) + { + _var.op = OPERAND_VAR; + _var.size = (uint8_t)size; + _var.id = other._base.id; + _var.registerCode = registerCode; + _var.variableType = other._var.variableType; + } +}; + +// ============================================================================ +// [AsmJit::X87Var] +// ============================================================================ + +//! @brief X87 Variable operand. +struct ASMJIT_HIDDEN X87Var : public BaseVar +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + inline X87Var(const _DontInitialize& dontInitialize) ASMJIT_NOTHROW : + BaseVar(dontInitialize) + { + } + + inline X87Var() ASMJIT_NOTHROW : + BaseVar(_DontInitialize()) + { + _var.op = OPERAND_VAR; + _var.size = 12; + _var.id = INVALID_VALUE; + + _var.registerCode = REG_TYPE_X87; + _var.variableType = VARIABLE_TYPE_X87; + } + + inline X87Var(const X87Var& other) ASMJIT_NOTHROW : + BaseVar(other) {} + + // -------------------------------------------------------------------------- + // [Overloaded Operators] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_NODOC) + inline X87Var& operator=(const X87Var& other) ASMJIT_NOTHROW + { _copy(other); return *this; } + + inline bool operator==(const X87Var& other) const ASMJIT_NOTHROW { return _base.id == other._base.id; } + inline bool operator!=(const X87Var& other) const ASMJIT_NOTHROW { return _base.id != other._base.id; } +#endif // ASMJIT_NODOC +}; + +// ============================================================================ +// [AsmJit::GPVar] +// ============================================================================ + +//! @brief GP variable operand. +struct ASMJIT_HIDDEN GPVar : public BaseVar +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new uninitialized @c GPVar instance (internal constructor). + inline GPVar(const _DontInitialize& dontInitialize) ASMJIT_NOTHROW : + BaseVar(dontInitialize) + { + } + + //! @brief Create new uninitialized @c GPVar instance. + inline GPVar() ASMJIT_NOTHROW : + BaseVar(_DontInitialize()) + { + _var.op = OPERAND_VAR; + _var.size = sizeof(sysint_t); + _var.id = INVALID_VALUE; + + _var.registerCode = REG_TYPE_GPN; + _var.variableType = VARIABLE_TYPE_GPN; + } + + //! @brief Create new @c GPVar instance using @a other. + //! + //! Note this will not create a different variable, use @c Compiler::newGP() + //! if you want to do so. This is only copy-constructor that allows to store + //! the same variable in different places. + inline GPVar(const GPVar& other) ASMJIT_NOTHROW : + BaseVar(other) {} + + // -------------------------------------------------------------------------- + // [GPVar Specific] + // -------------------------------------------------------------------------- + + //! @brief Get whether this variable is general purpose BYTE register. + inline bool isGPB() const ASMJIT_NOTHROW { return (_var.registerCode & REG_TYPE_MASK) <= REG_TYPE_GPB_HI; } + //! @brief Get whether this variable is general purpose BYTE.LO register. + inline bool isGPBLo() const ASMJIT_NOTHROW { return (_var.registerCode & REG_TYPE_MASK) == REG_TYPE_GPB_LO; } + //! @brief Get whether this variable is general purpose BYTE.HI register. + inline bool isGPBHi() const ASMJIT_NOTHROW { return (_var.registerCode & REG_TYPE_MASK) == REG_TYPE_GPB_HI; } + + //! @brief Get whether this variable is general purpose WORD register. + inline bool isGPW() const ASMJIT_NOTHROW { return (_var.registerCode & REG_TYPE_MASK) == REG_TYPE_GPW; } + //! @brief Get whether this variable is general purpose DWORD register. + inline bool isGPD() const ASMJIT_NOTHROW { return (_var.registerCode & REG_TYPE_MASK) == REG_TYPE_GPD; } + //! @brief Get whether this variable is general purpose QWORD (only 64-bit) register. + inline bool isGPQ() const ASMJIT_NOTHROW { return (_var.registerCode & REG_TYPE_MASK) == REG_TYPE_GPQ; } + + // -------------------------------------------------------------------------- + // [GPVar Cast] + // -------------------------------------------------------------------------- + + //! @brief Cast this variable to 8-bit (LO) part of variable + inline GPVar r8() const { return GPVar(*this, REG_TYPE_GPB_LO, 1); } + //! @brief Cast this variable to 8-bit (LO) part of variable + inline GPVar r8Lo() const { return GPVar(*this, REG_TYPE_GPB_LO, 1); } + //! @brief Cast this variable to 8-bit (HI) part of variable + inline GPVar r8Hi() const { return GPVar(*this, REG_TYPE_GPB_HI, 1); } + + //! @brief Cast this variable to 16-bit part of variable + inline GPVar r16() const { return GPVar(*this, REG_TYPE_GPW, 2); } + //! @brief Cast this variable to 32-bit part of variable + inline GPVar r32() const { return GPVar(*this, REG_TYPE_GPD, 4); } +#if defined(ASMJIT_X64) + //! @brief Cast this variable to 64-bit part of variable + inline GPVar r64() const { return GPVar(*this, REG_TYPE_GPQ, 8); } +#endif // ASMJIT_X64 + + // -------------------------------------------------------------------------- + // [Overloaded Operators] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_NODOC) + inline GPVar& operator=(const GPVar& other) ASMJIT_NOTHROW + { _copy(other); return *this; } + + inline bool operator==(const GPVar& other) const ASMJIT_NOTHROW { return _base.id == other._base.id && _var.registerCode == other._var.registerCode; } + inline bool operator!=(const GPVar& other) const ASMJIT_NOTHROW { return _base.id != other._base.id || _var.registerCode != other._var.registerCode; } +#endif // ASMJIT_NODOC + + // -------------------------------------------------------------------------- + // [Private] + // -------------------------------------------------------------------------- + +protected: + inline GPVar(const GPVar& other, uint32_t registerCode, uint32_t size) ASMJIT_NOTHROW : + BaseVar(other, registerCode, size) + { + } +}; + +// ============================================================================ +// [AsmJit::MMVar] +// ============================================================================ + +//! @brief MM variable operand. +struct ASMJIT_HIDDEN MMVar : public BaseVar +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new uninitialized @c MMVar instance (internal constructor). + inline MMVar(const _DontInitialize& dontInitialize) ASMJIT_NOTHROW : + BaseVar(dontInitialize) + { + } + + //! @brief Create new uninitialized @c MMVar instance. + inline MMVar() ASMJIT_NOTHROW : + BaseVar(_DontInitialize()) + { + _var.op = OPERAND_VAR; + _var.size = 8; + _var.id = INVALID_VALUE; + + _var.registerCode = REG_TYPE_MM; + _var.variableType = VARIABLE_TYPE_MM; + } + + //! @brief Create new @c MMVar instance using @a other. + //! + //! Note this will not create a different variable, use @c Compiler::newMM() + //! if you want to do so. This is only copy-constructor that allows to store + //! the same variable in different places. + inline MMVar(const MMVar& other) ASMJIT_NOTHROW : + BaseVar(other) {} + + // -------------------------------------------------------------------------- + // [MMVar Cast] + // -------------------------------------------------------------------------- + + // -------------------------------------------------------------------------- + // [Overloaded Operators] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_NODOC) + inline MMVar& operator=(const MMVar& other) ASMJIT_NOTHROW + { _copy(other); return *this; } + + inline bool operator==(const MMVar& other) const ASMJIT_NOTHROW { return _base.id == other._base.id; } + inline bool operator!=(const MMVar& other) const ASMJIT_NOTHROW { return _base.id != other._base.id; } +#endif // ASMJIT_NODOC +}; + +// ============================================================================ +// [AsmJit::XMMVar] +// ============================================================================ + +//! @brief XMM Variable operand. +struct ASMJIT_HIDDEN XMMVar : public BaseVar +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + inline XMMVar(const _DontInitialize& dontInitialize) ASMJIT_NOTHROW : + BaseVar(dontInitialize) + { + } + + inline XMMVar() ASMJIT_NOTHROW : + BaseVar(_DontInitialize()) + { + _var.op = OPERAND_VAR; + _var.size = 16; + _var.id = INVALID_VALUE; + + _var.registerCode = REG_TYPE_XMM; + _var.variableType = VARIABLE_TYPE_XMM; + } + + inline XMMVar(const XMMVar& other) ASMJIT_NOTHROW : + BaseVar(other) {} + + // -------------------------------------------------------------------------- + // [XMMVar Access] + // -------------------------------------------------------------------------- + + // -------------------------------------------------------------------------- + // [Overloaded Operators] + // -------------------------------------------------------------------------- + +#if !defined(ASMJIT_NODOC) + inline XMMVar& operator=(const XMMVar& other) ASMJIT_NOTHROW + { _copy(other); return *this; } + + inline bool operator==(const XMMVar& other) const ASMJIT_NOTHROW { return _base.id == other._base.id; } + inline bool operator!=(const XMMVar& other) const ASMJIT_NOTHROW { return _base.id != other._base.id; } +#endif // ASMJIT_NODOC +}; + +// ============================================================================ +// [AsmJit::Mem - ptr[displacement]] +// ============================================================================ + +//! @internal +ASMJIT_API Mem _MemPtrBuild(const Label& label, sysint_t disp, uint32_t ptrSize) ASMJIT_NOTHROW; + +//! @internal +ASMJIT_API Mem _MemPtrBuild(const Label& label, const GPReg& index, uint32_t shift, sysint_t disp, uint32_t ptrSize) ASMJIT_NOTHROW; + +//! @internal +ASMJIT_API Mem _MemPtrBuild(const Label& label, const GPVar& index, uint32_t shift, sysint_t disp, uint32_t ptrSize) ASMJIT_NOTHROW; + + + +//! @brief Create pointer operand with not specified size. +static inline Mem ptr(const Label& label, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, disp, 0); } + +//! @brief Create byte pointer operand. +static inline Mem byte_ptr(const Label& label, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, disp, SIZE_BYTE); } + +//! @brief Create word (2 Bytes) pointer operand. +static inline Mem word_ptr(const Label& label, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, disp, SIZE_WORD); } + +//! @brief Create dword (4 Bytes) pointer operand. +static inline Mem dword_ptr(const Label& label, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, disp, SIZE_DWORD); } + +//! @brief Create qword (8 Bytes) pointer operand. +static inline Mem qword_ptr(const Label& label, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, disp, SIZE_QWORD); } + +//! @brief Create tword (10 Bytes) pointer operand (used for 80-bit floating points). +static inline Mem tword_ptr(const Label& label, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, disp, SIZE_TWORD); } + +//! @brief Create dqword (16 Bytes) pointer operand. +static inline Mem dqword_ptr(const Label& label, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, disp, SIZE_DQWORD); } + +//! @brief Create mmword (8 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for mmx programming. +static inline Mem mmword_ptr(const Label& label, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, disp, SIZE_QWORD); } + +//! @brief Create xmmword (16 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for sse programming. +static inline Mem xmmword_ptr(const Label& label, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, disp, SIZE_DQWORD); } + +//! @brief Create system dependent pointer operand (32-bit or 64-bit). +static inline Mem sysint_ptr(const Label& label, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, disp, sizeof(sysint_t)); } + + + +//! @brief Create pointer operand with not specified size. +static inline Mem ptr(const Label& label, const GPReg& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, 0); } + +//! @brief Create byte pointer operand. +static inline Mem byte_ptr(const Label& label, const GPReg& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_BYTE); } + +//! @brief Create word (2 Bytes) pointer operand. +static inline Mem word_ptr(const Label& label, const GPReg& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_WORD); } + +//! @brief Create dword (4 Bytes) pointer operand. +static inline Mem dword_ptr(const Label& label, const GPReg& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_DWORD); } + +//! @brief Create qword (8 Bytes) pointer operand. +static inline Mem qword_ptr(const Label& label, const GPReg& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_QWORD); } + +//! @brief Create tword (10 Bytes) pointer operand (used for 80-bit floating points). +static inline Mem tword_ptr(const Label& label, const GPReg& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_TWORD); } + +//! @brief Create dqword (16 Bytes) pointer operand. +static inline Mem dqword_ptr(const Label& label, const GPReg& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_DQWORD); } + +//! @brief Create mmword (8 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for mmx programming. +static inline Mem mmword_ptr(const Label& label, const GPReg& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_QWORD); } + +//! @brief Create xmmword (16 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for sse programming. +static inline Mem xmmword_ptr(const Label& label, const GPReg& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_DQWORD); } + +//! @brief Create system dependent pointer operand (32-bit or 64-bit). +static inline Mem sysint_ptr(const Label& label, const GPReg& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, sizeof(sysint_t)); } + + + +//! @brief Create pointer operand with not specified size. +static inline Mem ptr(const Label& label, const GPVar& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, 0); } + +//! @brief Create byte pointer operand. +static inline Mem byte_ptr(const Label& label, const GPVar& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_BYTE); } + +//! @brief Create word (2 Bytes) pointer operand. +static inline Mem word_ptr(const Label& label, const GPVar& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_WORD); } + +//! @brief Create dword (4 Bytes) pointer operand. +static inline Mem dword_ptr(const Label& label, const GPVar& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_DWORD); } + +//! @brief Create qword (8 Bytes) pointer operand. +static inline Mem qword_ptr(const Label& label, const GPVar& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_QWORD); } + +//! @brief Create tword (10 Bytes) pointer operand (used for 80-bit floating points). +static inline Mem tword_ptr(const Label& label, const GPVar& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_TWORD); } + +//! @brief Create dqword (16 Bytes) pointer operand. +static inline Mem dqword_ptr(const Label& label, const GPVar& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_DQWORD); } + +//! @brief Create mmword (8 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for mmx programming. +static inline Mem mmword_ptr(const Label& label, const GPVar& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_QWORD); } + +//! @brief Create xmmword (16 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for sse programming. +static inline Mem xmmword_ptr(const Label& label, const GPVar& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, SIZE_DQWORD); } + +//! @brief Create system dependent pointer operand (32-bit or 64-bit). +static inline Mem sysint_ptr(const Label& label, const GPVar& index, uint32_t shift, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(label, index, shift, disp, sizeof(sysint_t)); } + +// ============================================================================ +// [AsmJit::Mem - Absolute Addressing] +// ============================================================================ + +//! @internal +ASMJIT_API Mem _MemPtrAbs( + void* target, + sysint_t disp, + uint32_t segmentPrefix, uint32_t ptrSize) ASMJIT_NOTHROW; + +//! @internal +ASMJIT_API Mem _MemPtrAbs( + void* target, + const GPReg& index, uint32_t shift, sysint_t disp, + uint32_t segmentPrefix, uint32_t ptrSize) ASMJIT_NOTHROW; + +//! @internal +ASMJIT_API Mem _MemPtrAbs( + void* target, + const GPVar& index, uint32_t shift, sysint_t disp, + uint32_t segmentPrefix, uint32_t ptrSize) ASMJIT_NOTHROW; + + + +//! @brief Create pointer operand with not specified size. +static inline Mem ptr_abs(void* target, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, disp, segmentPrefix, 0); } + +//! @brief Create byte pointer operand. +static inline Mem byte_ptr_abs(void* target, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, disp, segmentPrefix, SIZE_BYTE); } + +//! @brief Create word (2 Bytes) pointer operand. +static inline Mem word_ptr_abs(void* target, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, disp, segmentPrefix, SIZE_WORD); } + +//! @brief Create dword (4 Bytes) pointer operand. +static inline Mem dword_ptr_abs(void* target, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, disp, segmentPrefix, SIZE_DWORD); } + +//! @brief Create qword (8 Bytes) pointer operand. +static inline Mem qword_ptr_abs(void* target, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, disp, segmentPrefix, SIZE_QWORD); } + +//! @brief Create tword (10 Bytes) pointer operand (used for 80-bit floating points). +static inline Mem tword_ptr_abs(void* target, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, disp, segmentPrefix, SIZE_TWORD); } + +//! @brief Create dqword (16 Bytes) pointer operand. +static inline Mem dqword_ptr_abs(void* target, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, disp, segmentPrefix, SIZE_DQWORD); } + +//! @brief Create mmword (8 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for mmx programming. +static inline Mem mmword_ptr_abs(void* target, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, disp, segmentPrefix, SIZE_QWORD); } + +//! @brief Create xmmword (16 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for sse programming. +static inline Mem xmmword_ptr_abs(void* target, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, disp, segmentPrefix, SIZE_DQWORD); } + +//! @brief Create system dependent pointer operand (32-bit or 64-bit). +static inline Mem sysint_ptr_abs(void* target, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, disp, segmentPrefix, sizeof(sysint_t)); } + + + +//! @brief Create pointer operand with not specified size. +static inline Mem ptr_abs(void* target, const GPReg& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, 0); } + +//! @brief Create byte pointer operand. +static inline Mem byte_ptr_abs(void* target, const GPReg& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_BYTE); } + +//! @brief Create word (2 Bytes) pointer operand. +static inline Mem word_ptr_abs(void* target, const GPReg& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_WORD); } + +//! @brief Create dword (4 Bytes) pointer operand. +static inline Mem dword_ptr_abs(void* target, const GPReg& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_DWORD); } + +//! @brief Create qword (8 Bytes) pointer operand. +static inline Mem qword_ptr_abs(void* target, const GPReg& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_QWORD); } + +//! @brief Create tword (10 Bytes) pointer operand (used for 80-bit floating points). +static inline Mem tword_ptr_abs(void* target, const GPReg& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_TWORD); } + +//! @brief Create dqword (16 Bytes) pointer operand. +static inline Mem dqword_ptr_abs(void* target, const GPReg& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_DQWORD); } + +//! @brief Create mmword (8 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for mmx programming. +static inline Mem mmword_ptr_abs(void* target, const GPReg& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_QWORD); } + +//! @brief Create xmmword (16 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for sse programming. +static inline Mem xmmword_ptr_abs(void* target, const GPReg& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_DQWORD); } + +//! @brief Create system dependent pointer operand (32-bit or 64-bit). +static inline Mem sysint_ptr_abs(void* target, const GPReg& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, sizeof(sysint_t)); } + + + +//! @brief Create pointer operand with not specified size. +static inline Mem ptr_abs(void* target, const GPVar& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, 0); } + +//! @brief Create byte pointer operand. +static inline Mem byte_ptr_abs(void* target, const GPVar& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_BYTE); } + +//! @brief Create word (2 Bytes) pointer operand. +static inline Mem word_ptr_abs(void* target, const GPVar& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_WORD); } + +//! @brief Create dword (4 Bytes) pointer operand. +static inline Mem dword_ptr_abs(void* target, const GPVar& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_DWORD); } + +//! @brief Create qword (8 Bytes) pointer operand. +static inline Mem qword_ptr_abs(void* target, const GPVar& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_QWORD); } + +//! @brief Create tword (10 Bytes) pointer operand (used for 80-bit floating points). +static inline Mem tword_ptr_abs(void* target, const GPVar& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_TWORD); } + +//! @brief Create dqword (16 Bytes) pointer operand. +static inline Mem dqword_ptr_abs(void* target, const GPVar& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_DQWORD); } + +//! @brief Create mmword (8 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for mmx programming. +static inline Mem mmword_ptr_abs(void* target, const GPVar& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_QWORD); } + +//! @brief Create xmmword (16 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for sse programming. +static inline Mem xmmword_ptr_abs(void* target, const GPVar& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, SIZE_DQWORD); } + +//! @brief Create system dependent pointer operand (32-bit or 64-bit). +static inline Mem sysint_ptr_abs(void* target, const GPVar& index, uint32_t shift, sysint_t disp = 0, uint32_t segmentPrefix = SEGMENT_NONE) ASMJIT_NOTHROW +{ return _MemPtrAbs(target, index, shift, disp, segmentPrefix, sizeof(sysint_t)); } + +// ============================================================================ +// [AsmJit::Mem - ptr[base + displacement]] +// ============================================================================ + +//! @internal +ASMJIT_API Mem _MemPtrBuild(const GPReg& base, sysint_t disp, uint32_t ptrSize) ASMJIT_NOTHROW; + +//! @internal +ASMJIT_API Mem _MemPtrBuild(const GPVar& base, sysint_t disp, uint32_t ptrSize) ASMJIT_NOTHROW; + + + +//! @brief Create pointer operand with not specified size. +static inline Mem ptr(const GPReg& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, 0); } + +//! @brief Create byte pointer operand. +static inline Mem byte_ptr(const GPReg& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_BYTE); } + +//! @brief Create word (2 Bytes) pointer operand. +static inline Mem word_ptr(const GPReg& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_WORD); } + +//! @brief Create dword (4 Bytes) pointer operand. +static inline Mem dword_ptr(const GPReg& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_DWORD); } + +//! @brief Create qword (8 Bytes) pointer operand. +static inline Mem qword_ptr(const GPReg& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_QWORD); } + +//! @brief Create tword (10 Bytes) pointer operand (used for 80-bit floating points). +static inline Mem tword_ptr(const GPReg& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_TWORD); } + +//! @brief Create dqword (16 Bytes) pointer operand. +static inline Mem dqword_ptr(const GPReg& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_DQWORD); } + +//! @brief Create mmword (8 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for mmx programming. +static inline Mem mmword_ptr(const GPReg& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_QWORD); } + +//! @brief Create xmmword (16 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for sse programming. +static inline Mem xmmword_ptr(const GPReg& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_DQWORD); } + +//! @brief Create system dependent pointer operand (32-bit or 64-bit). +static inline Mem sysint_ptr(const GPReg& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, sizeof(sysint_t)); } + + + +//! @brief Create pointer operand with not specified size. +static inline Mem ptr(const GPVar& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, 0); } + +//! @brief Create byte pointer operand. +static inline Mem byte_ptr(const GPVar& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_BYTE); } + +//! @brief Create word (2 Bytes) pointer operand. +static inline Mem word_ptr(const GPVar& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_WORD); } + +//! @brief Create dword (4 Bytes) pointer operand. +static inline Mem dword_ptr(const GPVar& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_DWORD); } + +//! @brief Create qword (8 Bytes) pointer operand. +static inline Mem qword_ptr(const GPVar& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_QWORD); } + +//! @brief Create tword (10 Bytes) pointer operand (used for 80-bit floating points). +static inline Mem tword_ptr(const GPVar& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_TWORD); } + +//! @brief Create dqword (16 Bytes) pointer operand. +static inline Mem dqword_ptr(const GPVar& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_DQWORD); } + +//! @brief Create mmword (8 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for mmx programming. +static inline Mem mmword_ptr(const GPVar& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_QWORD); } + +//! @brief Create xmmword (16 bytes) pointer operand +//! +//! @note This constructor is provided only for convenience for sse programming. +static inline Mem xmmword_ptr(const GPVar& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, SIZE_DQWORD); } + +//! @brief Create system dependent pointer operand (32-bit or 64-bit). +static inline Mem sysint_ptr(const GPVar& base, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, disp, sizeof(sysint_t)); } + +// ============================================================================ +// [AsmJit::Mem - ptr[base + (index << shift) + displacement]] +// ============================================================================ + +//! @internal +ASMJIT_API Mem _MemPtrBuild(const GPReg& base, const GPReg& index, uint32_t shift, sysint_t disp, uint32_t ptrSize) ASMJIT_NOTHROW; + +//! @internal +ASMJIT_API Mem _MemPtrBuild(const GPVar& base, const GPVar& index, uint32_t shift, sysint_t disp, uint32_t ptrSize) ASMJIT_NOTHROW; + + + +//! @brief Create pointer operand with not specified size. +static inline Mem ptr(const GPReg& base, const GPReg& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, 0); } + +//! @brief Create byte pointer operand. +static inline Mem byte_ptr(const GPReg& base, const GPReg& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_BYTE); } + +//! @brief Create word (2 Bytes) pointer operand. +static inline Mem word_ptr(const GPReg& base, const GPReg& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_WORD); } + +//! @brief Create dword (4 Bytes) pointer operand. +static inline Mem dword_ptr(const GPReg& base, const GPReg& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_DWORD); } + +//! @brief Create qword (8 Bytes) pointer operand. +static inline Mem qword_ptr(const GPReg& base, const GPReg& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_QWORD); } + +//! @brief Create tword (10 Bytes) pointer operand (used for 80-bit floating points). +static inline Mem tword_ptr(const GPReg& base, const GPReg& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_TWORD); } + +//! @brief Create dqword (16 Bytes) pointer operand. +static inline Mem dqword_ptr(const GPReg& base, const GPReg& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_DQWORD); } + +//! @brief Create mmword (8 Bytes) pointer operand). +//! +//! @note This constructor is provided only for convenience for mmx programming. +static inline Mem mmword_ptr(const GPReg& base, const GPReg& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_QWORD); } + +//! @brief Create xmmword (16 Bytes) pointer operand. +//! +//! @note This constructor is provided only for convenience for sse programming. +static inline Mem xmmword_ptr(const GPReg& base, const GPReg& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_DQWORD); } + +//! @brief Create system dependent pointer operand (32-bit or 64-bit). +static inline Mem sysint_ptr(const GPReg& base, const GPReg& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, sizeof(sysint_t)); } + + + +//! @brief Create pointer operand with not specified size. +static inline Mem ptr(const GPVar& base, const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, 0); } + +//! @brief Create byte pointer operand. +static inline Mem byte_ptr(const GPVar& base, const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_BYTE); } + +//! @brief Create word (2 Bytes) pointer operand. +static inline Mem word_ptr(const GPVar& base, const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_WORD); } + +//! @brief Create dword (4 Bytes) pointer operand. +static inline Mem dword_ptr(const GPVar& base, const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_DWORD); } + +//! @brief Create qword (8 Bytes) pointer operand. +static inline Mem qword_ptr(const GPVar& base, const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_QWORD); } + +//! @brief Create tword (10 Bytes) pointer operand (used for 80-bit floating points). +static inline Mem tword_ptr(const GPVar& base, const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_TWORD); } + +//! @brief Create dqword (16 Bytes) pointer operand. +static inline Mem dqword_ptr(const GPVar& base, const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_DQWORD); } + +//! @brief Create mmword (8 Bytes) pointer operand). +//! +//! @note This constructor is provided only for convenience for mmx programming. +static inline Mem mmword_ptr(const GPVar& base, const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_QWORD); } + +//! @brief Create xmmword (16 Bytes) pointer operand. +//! +//! @note This constructor is provided only for convenience for sse programming. +static inline Mem xmmword_ptr(const GPVar& base, const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, SIZE_DQWORD); } + +//! @brief Create system dependent pointer operand (32-bit or 64-bit). +static inline Mem sysint_ptr(const GPVar& base, const GPVar& index, uint32_t shift = 0, sysint_t disp = 0) ASMJIT_NOTHROW +{ return _MemPtrBuild(base, index, shift, disp, sizeof(sysint_t)); } + +// ============================================================================ +// [AsmJit::Macros] +// ============================================================================ + +//! @brief Create Shuffle Constant for MMX/SSE shuffle instrutions. +//! @param z First component position, number at interval [0, 3] inclusive. +//! @param x Second component position, number at interval [0, 3] inclusive. +//! @param y Third component position, number at interval [0, 3] inclusive. +//! @param w Fourth component position, number at interval [0, 3] inclusive. +//! +//! Shuffle constants can be used to make immediate value for these intrinsics: +//! - @c AsmJit::AssemblerIntrinsics::pshufw() +//! - @c AsmJit::AssemblerIntrinsics::pshufd() +//! - @c AsmJit::AssemblerIntrinsics::pshufhw() +//! - @c AsmJit::AssemblerIntrinsics::pshuflw() +//! - @c AsmJit::AssemblerIntrinsics::shufps() +static inline uint8_t mm_shuffle(uint8_t z, uint8_t y, uint8_t x, uint8_t w) ASMJIT_NOTHROW +{ return (z << 6) | (y << 4) | (x << 2) | w; } + +//! @} + +} // AsmJit namespace + +// [Guard] +#endif // _ASMJIT_OPERANDX86X64_H diff --git a/lib/AsmJit/Platform.cpp b/lib/AsmJit/Platform.cpp new file mode 100644 index 0000000..b70ddc2 --- /dev/null +++ b/lib/AsmJit/Platform.cpp @@ -0,0 +1,241 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Dependencies] +#include <stdio.h> + +#include "Platform.h" + +// [Api-Begin] +#include "ApiBegin.h" + +// helpers +namespace AsmJit { + +// ============================================================================ +// [AsmJit::Assert] +// ============================================================================ + +void assertionFailure(const char* file, int line, const char* exp) +{ + fprintf(stderr, + "*** ASSERTION FAILURE at %s (line %d)\n" + "*** %s\n", file, line, exp); + + exit(1); +} + +// ============================================================================ +// [AsmJit::Helpers] +// ============================================================================ + +static bool isAligned(sysuint_t base, sysuint_t alignment) +{ + return base % alignment == 0; +} + +static sysuint_t roundUp(sysuint_t base, sysuint_t pageSize) +{ + sysuint_t over = base % pageSize; + return base + (over > 0 ? pageSize - over : 0); +} + +// Implementation is from "Hacker's Delight" by Henry S. Warren, Jr., +// figure 3-3, page 48, where the function is called clp2. +static sysuint_t roundUpToPowerOf2(sysuint_t base) +{ + base -= 1; + + base = base | (base >> 1); + base = base | (base >> 2); + base = base | (base >> 4); + base = base | (base >> 8); + base = base | (base >> 16); + + // I'm trying to make this portable and MSVC strikes me the warning C4293: + // "Shift count negative or too big, undefined behavior" + // Fixing... +#if _MSC_VER +# pragma warning(disable: 4293) +#endif // _MSC_VER + + if (sizeof(sysuint_t) >= 8) + base = base | (base >> 32); + + return base + 1; +} + +} // AsmJit namespace + +// ============================================================================ +// [AsmJit::VirtualMemory::Windows] +// ============================================================================ + +#if defined(ASMJIT_WINDOWS) + +#include <windows.h> + +namespace AsmJit { + +struct ASMJIT_HIDDEN VirtualMemoryLocal +{ + VirtualMemoryLocal() ASMJIT_NOTHROW + { + SYSTEM_INFO info; + GetSystemInfo(&info); + + alignment = info.dwAllocationGranularity; + pageSize = roundUpToPowerOf2(info.dwPageSize); + } + + sysuint_t alignment; + sysuint_t pageSize; +}; + +static VirtualMemoryLocal& vm() ASMJIT_NOTHROW +{ + static VirtualMemoryLocal vm; + return vm; +}; + +void* VirtualMemory::alloc(sysuint_t length, sysuint_t* allocated, bool canExecute) + ASMJIT_NOTHROW +{ + return allocProcessMemory(GetCurrentProcess(), length, allocated, canExecute); +} + +void VirtualMemory::free(void* addr, sysuint_t length) + ASMJIT_NOTHROW +{ + return freeProcessMemory(GetCurrentProcess(), addr, length); +} + +void* VirtualMemory::allocProcessMemory(HANDLE hProcess, sysuint_t length, sysuint_t* allocated, bool canExecute) ASMJIT_NOTHROW +{ + // VirtualAlloc rounds allocated size to page size automatically. + sysuint_t msize = roundUp(length, vm().pageSize); + + // Windows XP SP2 / Vista allow Data Excution Prevention (DEP). + WORD protect = canExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; + LPVOID mbase = VirtualAllocEx(hProcess, NULL, msize, MEM_COMMIT | MEM_RESERVE, protect); + if (mbase == NULL) return NULL; + + ASMJIT_ASSERT(isAligned(reinterpret_cast<sysuint_t>(mbase), vm().alignment)); + + if (allocated) *allocated = msize; + return mbase; +} + +void VirtualMemory::freeProcessMemory(HANDLE hProcess, void* addr, sysuint_t /* length */) ASMJIT_NOTHROW +{ + VirtualFreeEx(hProcess, addr, 0, MEM_RELEASE); +} + +sysuint_t VirtualMemory::getAlignment() + ASMJIT_NOTHROW +{ + return vm().alignment; +} + +sysuint_t VirtualMemory::getPageSize() + ASMJIT_NOTHROW +{ + return vm().pageSize; +} + +} // AsmJit + +#endif // ASMJIT_WINDOWS + +// ============================================================================ +// [AsmJit::VirtualMemory::Posix] +// ============================================================================ + +#if defined(ASMJIT_POSIX) + +#include <sys/types.h> +#include <sys/mman.h> +#include <unistd.h> + +// MacOS uses MAP_ANON instead of MAP_ANONYMOUS +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +namespace AsmJit { + +struct ASMJIT_HIDDEN VirtualMemoryLocal +{ + VirtualMemoryLocal() ASMJIT_NOTHROW + { + alignment = pageSize = getpagesize(); + } + + sysuint_t alignment; + sysuint_t pageSize; +}; + +static VirtualMemoryLocal& vm() + ASMJIT_NOTHROW +{ + static VirtualMemoryLocal vm; + return vm; +} + +void* VirtualMemory::alloc(sysuint_t length, sysuint_t* allocated, bool canExecute) + ASMJIT_NOTHROW +{ + sysuint_t msize = roundUp(length, vm().pageSize); + int protection = PROT_READ | PROT_WRITE | (canExecute ? PROT_EXEC : 0); + void* mbase = mmap(NULL, msize, protection, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mbase == MAP_FAILED) return NULL; + if (allocated) *allocated = msize; + return mbase; +} + +void VirtualMemory::free(void* addr, sysuint_t length) + ASMJIT_NOTHROW +{ + munmap(addr, length); +} + +sysuint_t VirtualMemory::getAlignment() + ASMJIT_NOTHROW +{ + return vm().alignment; +} + +sysuint_t VirtualMemory::getPageSize() + ASMJIT_NOTHROW +{ + return vm().pageSize; +} + +} // AsmJit + +#endif // ASMJIT_POSIX + +// [Api-End] +#include "ApiEnd.h" diff --git a/lib/AsmJit/Platform.h b/lib/AsmJit/Platform.h new file mode 100644 index 0000000..3665238 --- /dev/null +++ b/lib/AsmJit/Platform.h @@ -0,0 +1,227 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_PLATFORM_H +#define _ASMJIT_PLATFORM_H + +// [Dependencies] +#include "Build.h" + +#if defined(ASMJIT_WINDOWS) +#include <windows.h> +#endif // ASMJIT_WINDOWS + +#if defined(ASMJIT_POSIX) +#include <pthread.h> +#endif // ASMJIT_POSIX + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +//! @addtogroup AsmJit_Util +//! @{ + +// ============================================================================ +// [AsmJit::Assert] +// ============================================================================ + +//! @brief Called in debug build on assertion failure. +//! @param file Source file name where it happened. +//! @param line Line in the source file. +//! @param exp Expression what failed. +//! +//! If you have problems with assertions simply put a breakpoint into +//! AsmJit::assertionFailure() method (see AsmJit/Platform.cpp file) and see +//! call stack. +ASMJIT_API void assertionFailure(const char* file, int line, const char* exp); + +// ============================================================================ +// [AsmJit::Lock] +// ============================================================================ + +//! @brief Lock - used in thread-safe code for locking. +struct ASMJIT_HIDDEN Lock +{ +#if defined(ASMJIT_WINDOWS) + typedef CRITICAL_SECTION Handle; +#endif // ASMJIT_WINDOWS +#if defined(ASMJIT_POSIX) + typedef pthread_mutex_t Handle; +#endif // ASMJIT_POSIX + + //! @brief Create a new @ref Lock instance. + inline Lock() ASMJIT_NOTHROW + { +#if defined(ASMJIT_WINDOWS) + InitializeCriticalSection(&_handle); + // InitializeLockAndSpinCount(&_handle, 2000); +#endif // ASMJIT_WINDOWS +#if defined(ASMJIT_POSIX) + pthread_mutex_init(&_handle, NULL); +#endif // ASMJIT_POSIX + } + + //! @brief Destroy the @ref Lock instance. + inline ~Lock() ASMJIT_NOTHROW + { +#if defined(ASMJIT_WINDOWS) + DeleteCriticalSection(&_handle); +#endif // ASMJIT_WINDOWS +#if defined(ASMJIT_POSIX) + pthread_mutex_destroy(&_handle); +#endif // ASMJIT_POSIX + } + + //! @brief Get handle. + inline Handle& getHandle() ASMJIT_NOTHROW + { + return _handle; + } + + //! @overload + inline const Handle& getHandle() const ASMJIT_NOTHROW + { + return _handle; + } + + //! @brief Lock. + inline void lock() ASMJIT_NOTHROW + { +#if defined(ASMJIT_WINDOWS) + EnterCriticalSection(&_handle); +#endif // ASMJIT_WINDOWS +#if defined(ASMJIT_POSIX) + pthread_mutex_lock(&_handle); +#endif // ASMJIT_POSIX + } + + //! @brief Unlock. + inline void unlock() ASMJIT_NOTHROW + { +#if defined(ASMJIT_WINDOWS) + LeaveCriticalSection(&_handle); +#endif // ASMJIT_WINDOWS +#if defined(ASMJIT_POSIX) + pthread_mutex_unlock(&_handle); +#endif // ASMJIT_POSIX + } + +private: + //! @brief Handle. + Handle _handle; + + // Disable copy. + ASMJIT_DISABLE_COPY(Lock) +}; + +// ============================================================================ +// [AsmJit::AutoLock] +// ============================================================================ + +//! @brief Scope auto locker. +struct ASMJIT_HIDDEN AutoLock +{ + //! @brief Locks @a target. + inline AutoLock(Lock& target) ASMJIT_NOTHROW : _target(target) + { + _target.lock(); + } + + //! @brief Unlocks target. + inline ~AutoLock() ASMJIT_NOTHROW + { + _target.unlock(); + } + +private: + //! @brief Pointer to target (lock). + Lock& _target; + + // Disable copy. + ASMJIT_DISABLE_COPY(AutoLock) +}; + +// ============================================================================ +// [AsmJit::VirtualMemory] +// ============================================================================ + +//! @brief Class that helps with allocating memory for executing code +//! generated by JIT compiler. +//! +//! There are defined functions that provides facility to allocate and free +//! memory where can be executed code. If processor and operating system +//! supports execution protection then you can't run code from normally +//! malloc()'ed memory. +//! +//! Functions are internally implemented by operating system dependent way. +//! VirtualAlloc() function is used for Windows operating system and mmap() +//! for posix ones. If you want to study or create your own functions, look +//! at VirtualAlloc() or mmap() documentation (depends on you target OS). +//! +//! Under posix operating systems is also useable mprotect() function, that +//! can enable execution protection to malloc()'ed memory block. +struct ASMJIT_API VirtualMemory +{ + //! @brief Allocate virtual memory. + //! + //! Pages are readable/writeable, but they are not guaranteed to be + //! executable unless 'canExecute' is true. Returns the address of + //! allocated memory, or NULL if failed. + static void* alloc(sysuint_t length, sysuint_t* allocated, bool canExecute) ASMJIT_NOTHROW; + + //! @brief Free memory allocated by @c alloc() + static void free(void* addr, sysuint_t length) ASMJIT_NOTHROW; + +#if defined(ASMJIT_WINDOWS) + //! @brief Allocate virtual memory of @a hProcess. + //! + //! @note This function is windows specific and unportable. + static void* allocProcessMemory(HANDLE hProcess, sysuint_t length, sysuint_t* allocated, bool canExecute) ASMJIT_NOTHROW; + + //! @brief Free virtual memory of @a hProcess. + //! + //! @note This function is windows specific and unportable. + static void freeProcessMemory(HANDLE hProcess, void* addr, sysuint_t length) ASMJIT_NOTHROW; +#endif // ASMJIT_WINDOWS + + //! @brief Get the alignment guaranteed by alloc(). + static sysuint_t getAlignment() ASMJIT_NOTHROW; + + //! @brief Get size of single page. + static sysuint_t getPageSize() ASMJIT_NOTHROW; +}; + +//! @} + +} // AsmJit namespace + +// [Api-End] +#include "ApiEnd.h" + +// [Guard] +#endif // _ASMJIT_PLATFORM_H diff --git a/lib/AsmJit/Util.cpp b/lib/AsmJit/Util.cpp new file mode 100644 index 0000000..4d09e80 --- /dev/null +++ b/lib/AsmJit/Util.cpp @@ -0,0 +1,285 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Dependencies] +#include "Build.h" +#include "Util_p.h" + +// [Api-Begin] +#include "ApiBegin.h" + +namespace AsmJit { + +// ============================================================================ +// [AsmJit::Util] +// ============================================================================ + +static const char letters[] = "0123456789ABCDEF"; + +char* Util::mycpy(char* dst, const char* src, sysuint_t len) ASMJIT_NOTHROW +{ + if (src == NULL) return dst; + + if (len == (sysuint_t)-1) + { + while (*src) *dst++ = *src++; + } + else + { + memcpy(dst, src, len); + dst += len; + } + + return dst; +} + +char* Util::myfill(char* dst, const int c, sysuint_t len) ASMJIT_NOTHROW +{ + memset(dst, c, len); + return dst + len; +} + +char* Util::myhex(char* dst, const uint8_t* src, sysuint_t len) ASMJIT_NOTHROW +{ + for (sysuint_t i = len; i; i--, dst += 2, src += 1) + { + dst[0] = letters[(src[0] >> 4) & 0xF]; + dst[1] = letters[(src[0] ) & 0xF]; + } + + return dst; +} + +// Not too efficient, but this is mainly for debugging:) +char* Util::myutoa(char* dst, sysuint_t i, sysuint_t base) ASMJIT_NOTHROW +{ + ASMJIT_ASSERT(base <= 16); + + char buf[128]; + char* p = buf + 128; + + do { + sysint_t b = i % base; + *--p = letters[b]; + i /= base; + } while (i); + + return Util::mycpy(dst, p, (sysuint_t)(buf + 128 - p)); +} + +char* Util::myitoa(char* dst, sysint_t i, sysuint_t base) ASMJIT_NOTHROW +{ + if (i < 0) + { + *dst++ = '-'; + i = -i; + } + + return Util::myutoa(dst, (sysuint_t)i, base); +} + +// ============================================================================ +// [AsmJit::Buffer] +// ============================================================================ + +void Buffer::emitData(const void* dataPtr, sysuint_t dataLen) ASMJIT_NOTHROW +{ + sysint_t max = getCapacity() - getOffset(); + if ((sysuint_t)max < dataLen) + { + if (!realloc(getOffset() + dataLen)) return; + } + + memcpy(_cur, dataPtr, dataLen); + _cur += dataLen; +} + +bool Buffer::realloc(sysint_t to) ASMJIT_NOTHROW +{ + if (getCapacity() < to) + { + sysint_t len = getOffset(); + + uint8_t *newdata; + if (_data) + newdata = (uint8_t*)ASMJIT_REALLOC(_data, to); + else + newdata = (uint8_t*)ASMJIT_MALLOC(to); + if (!newdata) return false; + + _data = newdata; + _cur = newdata + len; + _max = newdata + to; + _max -= (to >= _growThreshold) ? _growThreshold : to; + + _capacity = to; + } + + return true; +} + +bool Buffer::grow() ASMJIT_NOTHROW +{ + sysint_t to = _capacity; + + if (to < 512) + to = 1024; + else if (to > 65536) + to += 65536; + else + to <<= 1; + + return realloc(to); +} + +void Buffer::clear() ASMJIT_NOTHROW +{ + _cur = _data; +} + +void Buffer::free() ASMJIT_NOTHROW +{ + if (!_data) return; + ASMJIT_FREE(_data); + + _data = NULL; + _cur = NULL; + _max = NULL; + _capacity = 0; +} + +uint8_t* Buffer::take() ASMJIT_NOTHROW +{ + uint8_t* data = _data; + + _data = NULL; + _cur = NULL; + _max = NULL; + _capacity = 0; + + return data; +} + +// ============================================================================ +// [AsmJit::Zone] +// ============================================================================ + +Zone::Zone(sysuint_t chunkSize) ASMJIT_NOTHROW +{ + _chunks = NULL; + _total = 0; + _chunkSize = chunkSize; +} + +Zone::~Zone() ASMJIT_NOTHROW +{ + freeAll(); +} + +void* Zone::zalloc(sysuint_t size) ASMJIT_NOTHROW +{ + // Align to 4 or 8 bytes. + size = (size + sizeof(sysint_t)-1) & ~(sizeof(sysint_t)-1); + + Chunk* cur = _chunks; + + if (!cur || cur->getRemainingBytes() < size) + { + sysuint_t chSize = _chunkSize; + if (chSize < size) chSize = size; + + cur = (Chunk*)ASMJIT_MALLOC(sizeof(Chunk) - sizeof(void*) + chSize); + if (!cur) return NULL; + + cur->prev = _chunks; + cur->pos = 0; + cur->size = _chunkSize; + _chunks = cur; + } + + uint8_t* p = cur->data + cur->pos; + cur->pos += size; + _total += size; + return (void*)p; +} + +char* Zone::zstrdup(const char* str) ASMJIT_NOTHROW +{ + if (str == NULL) return NULL; + + sysuint_t len = strlen(str); + if (len == 0) return NULL; + + // Include NULL terminator. + len++; + + // Limit string length. + if (len > 256) len = 256; + + char* m = reinterpret_cast<char*>(zalloc((len + 15) & ~15)); + if (!m) return NULL; + + memcpy(m, str, len); + m[len-1] = '\0'; + return m; +} + +void Zone::clear() ASMJIT_NOTHROW +{ + Chunk* cur = _chunks; + if (!cur) return; + + _chunks->pos = 0; + _chunks->prev = NULL; + _total = 0; + + cur = cur->prev; + while (cur) + { + Chunk* prev = cur->prev; + ASMJIT_FREE(cur); + cur = prev; + } +} + +void Zone::freeAll() ASMJIT_NOTHROW +{ + Chunk* cur = _chunks; + + _chunks = NULL; + _total = 0; + + while (cur) + { + Chunk* prev = cur->prev; + ASMJIT_FREE(cur); + cur = prev; + } +} + +} // AsmJit namespace + +// [Api-End] +#include "ApiEnd.h" diff --git a/lib/AsmJit/Util.h b/lib/AsmJit/Util.h new file mode 100644 index 0000000..da824a6 --- /dev/null +++ b/lib/AsmJit/Util.h @@ -0,0 +1,933 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_UTIL_H +#define _ASMJIT_UTIL_H + +// [Dependencies] +#include "Build.h" + +#include <stdlib.h> +#include <string.h> + +namespace AsmJit { + +//! @addtogroup AsmJit_Util +//! @{ + +// ============================================================================ +// [AsmJit::Macros] +// ============================================================================ + +// Skip documenting this. +#if !defined(ASMJIT_NODOC) +struct ASMJIT_HIDDEN _DontInitialize {}; +struct ASMJIT_HIDDEN _Initialize {}; +#endif // !ASMJIT_NODOC + +// ============================================================================ +// [AsmJit::Util] +// ============================================================================ + +namespace Util { + +// ============================================================================ +// [AsmJit::Util::isInt?] +// ============================================================================ + +//! @brief Returns @c true if a given integer @a x is signed 8-bit integer +static inline bool isInt8(sysint_t x) ASMJIT_NOTHROW { return x >= -128 && x <= 127; } +//! @brief Returns @c true if a given integer @a x is unsigned 8-bit integer +static inline bool isUInt8(sysint_t x) ASMJIT_NOTHROW { return x >= 0 && x <= 255; } + +//! @brief Returns @c true if a given integer @a x is signed 16-bit integer +static inline bool isInt16(sysint_t x) ASMJIT_NOTHROW { return x >= -32768 && x <= 32767; } +//! @brief Returns @c true if a given integer @a x is unsigned 16-bit integer +static inline bool isUInt16(sysint_t x) ASMJIT_NOTHROW { return x >= 0 && x <= 65535; } + +//! @brief Returns @c true if a given integer @a x is signed 16-bit integer +static inline bool isInt32(sysint_t x) ASMJIT_NOTHROW +{ +#if defined(ASMJIT_X86) + return true; +#else + return x >= ASMJIT_INT64_C(-2147483648) && x <= ASMJIT_INT64_C(2147483647); +#endif +} +//! @brief Returns @c true if a given integer @a x is unsigned 16-bit integer +static inline bool isUInt32(sysint_t x) ASMJIT_NOTHROW +{ +#if defined(ASMJIT_X86) + return x >= 0; +#else + return x >= 0 && x <= ASMJIT_INT64_C(4294967295); +#endif +} + +// ============================================================================ +// [Bit Utils] +// ============================================================================ + +static inline uint32_t maskFromIndex(uint32_t x) +{ + return (1U << x); +} + +static inline uint32_t maskUpToIndex(uint32_t x) +{ + if (x >= 32) + return 0xFFFFFFFF; + else + return (1U << x) - 1; +} + +// From http://graphics.stanford.edu/~seander/bithacks.html . +static inline uint32_t bitCount(uint32_t x) +{ + x = x - ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + return ((x + (x >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; +} + +static inline uint32_t findFirstBit(uint32_t mask) ASMJIT_NOTHROW +{ + for (uint32_t i = 0, bit = 1; i < sizeof(uint32_t) * 8; i++, bit <<= 1) + { + if (mask & bit) return i; + } + + // INVALID_VALUE. + return 0xFFFFFFFF; +} + +// ============================================================================ +// [Alignment] +// ============================================================================ + +// Align variable @a x to 16-bytes. +template<typename T> +static inline T alignTo16(const T& x) +{ + return (x + (T)15) & (T)~15; +} + +// Return the size needed to align variable @a x to 16-bytes. +template<typename T> +static inline T deltaTo16(const T& x) +{ + T aligned = alignTo16(x); + return aligned - x; +} + +} // Util namespace + +// ============================================================================ +// [AsmJit::function_cast<>] +// ============================================================================ + +//! @brief Cast used to cast pointer to function. It's like reinterpret_cast<>, +//! but uses internally C style cast to work with MinGW. +//! +//! If you are using single compiler and @c reinterpret_cast<> works for you, +//! there is no reason to use @c AsmJit::function_cast<>. If you are writing +//! cross-platform software with various compiler support, consider using +//! @c AsmJit::function_cast<> instead of @c reinterpret_cast<>. +template<typename T, typename Z> +static inline T function_cast(Z* p) ASMJIT_NOTHROW { return (T)p; } + +// ============================================================================ +// [AsmJit::(X)MMData] +// ============================================================================ + +//! @brief Structure used for MMX specific data (64-bit). +//! +//! This structure can be used to load / store data from / to MMX register. +union ASMJIT_HIDDEN MMData +{ + // -------------------------------------------------------------------------- + // [Methods] + // -------------------------------------------------------------------------- + + //! @brief Set all eight signed 8-bit integers. + inline void setSB( + int8_t x0, int8_t x1, int8_t x2, int8_t x3, int8_t x4, int8_t x5, int8_t x6, int8_t x7) ASMJIT_NOTHROW + { + sb[0] = x0; sb[1] = x1; sb[2] = x2; sb[3] = x3; sb[4] = x4; sb[5] = x5; sb[6] = x6; sb[7] = x7; + } + + //! @brief Set all eight unsigned 8-bit integers. + inline void setUB( + uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7) ASMJIT_NOTHROW + { + ub[0] = x0; ub[1] = x1; ub[2] = x2; ub[3] = x3; ub[4] = x4; ub[5] = x5; ub[6] = x6; ub[7] = x7; + } + + //! @brief Set all four signed 16-bit integers. + inline void setSW( + int16_t x0, int16_t x1, int16_t x2, int16_t x3) ASMJIT_NOTHROW + { + sw[0] = x0; sw[1] = x1; sw[2] = x2; sw[3] = x3; + } + + //! @brief Set all four unsigned 16-bit integers. + inline void setUW( + uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3) ASMJIT_NOTHROW + { + uw[0] = x0; uw[1] = x1; uw[2] = x2; uw[3] = x3; + } + + //! @brief Set all two signed 32-bit integers. + inline void setSD( + int32_t x0, int32_t x1) ASMJIT_NOTHROW + { + sd[0] = x0; sd[1] = x1; + } + + //! @brief Set all two unsigned 32-bit integers. + inline void setUD( + uint32_t x0, uint32_t x1) ASMJIT_NOTHROW + { + ud[0] = x0; ud[1] = x1; + } + + //! @brief Set signed 64-bit integer. + inline void setSQ( + int64_t x0) ASMJIT_NOTHROW + { + sq[0] = x0; + } + + //! @brief Set unsigned 64-bit integer. + inline void setUQ( + uint64_t x0) ASMJIT_NOTHROW + { + uq[0] = x0; + } + + //! @brief Set all two SP-FP values. + inline void setSF( + float x0, float x1) ASMJIT_NOTHROW + { + sf[0] = x0; sf[1] = x1; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Array of eight signed 8-bit integers. + int8_t sb[8]; + //! @brief Array of eight unsigned 8-bit integers. + uint8_t ub[8]; + //! @brief Array of four signed 16-bit integers. + int16_t sw[4]; + //! @brief Array of four unsigned 16-bit integers. + uint16_t uw[4]; + //! @brief Array of two signed 32-bit integers. + int32_t sd[2]; + //! @brief Array of two unsigned 32-bit integers. + uint32_t ud[2]; + //! @brief Array of one signed 64-bit integer. + int64_t sq[1]; + //! @brief Array of one unsigned 64-bit integer. + uint64_t uq[1]; + + //! @brief Array of two SP-FP values. + float sf[2]; +}; + +//! @brief Structure used for SSE specific data (128-bit). +//! +//! This structure can be used to load / store data from / to SSE register. +//! +//! @note Always align SSE data to 16-bytes. +union ASMJIT_HIDDEN XMMData +{ + // -------------------------------------------------------------------------- + // [Methods] + // -------------------------------------------------------------------------- + + //! @brief Set all sixteen signed 8-bit integers. + inline void setSB( + int8_t x0, int8_t x1, int8_t x2 , int8_t x3 , int8_t x4 , int8_t x5 , int8_t x6 , int8_t x7 , + int8_t x8, int8_t x9, int8_t x10, int8_t x11, int8_t x12, int8_t x13, int8_t x14, int8_t x15) ASMJIT_NOTHROW + { + sb[0] = x0; sb[1] = x1; sb[ 2] = x2 ; sb[3 ] = x3 ; sb[4 ] = x4 ; sb[5 ] = x5 ; sb[6 ] = x6 ; sb[7 ] = x7 ; + sb[8] = x8; sb[9] = x9; sb[10] = x10; sb[11] = x11; sb[12] = x12; sb[13] = x13; sb[14] = x14; sb[15] = x15; + } + + //! @brief Set all sixteen unsigned 8-bit integers. + inline void setUB( + uint8_t x0, uint8_t x1, uint8_t x2 , uint8_t x3 , uint8_t x4 , uint8_t x5 , uint8_t x6 , uint8_t x7 , + uint8_t x8, uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15) ASMJIT_NOTHROW + { + ub[0] = x0; ub[1] = x1; ub[ 2] = x2 ; ub[3 ] = x3 ; ub[4 ] = x4 ; ub[5 ] = x5 ; ub[6 ] = x6 ; ub[7 ] = x7 ; + ub[8] = x8; ub[9] = x9; ub[10] = x10; ub[11] = x11; ub[12] = x12; ub[13] = x13; ub[14] = x14; ub[15] = x15; + } + + //! @brief Set all eight signed 16-bit integers. + inline void setSW( + int16_t x0, int16_t x1, int16_t x2, int16_t x3, int16_t x4, int16_t x5, int16_t x6, int16_t x7) ASMJIT_NOTHROW + { + sw[0] = x0; sw[1] = x1; sw[2] = x2; sw[3] = x3; sw[4] = x4; sw[5] = x5; sw[6] = x6; sw[7] = x7; + } + + //! @brief Set all eight unsigned 16-bit integers. + inline void setUW( + uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3, uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) ASMJIT_NOTHROW + { + uw[0] = x0; uw[1] = x1; uw[2] = x2; uw[3] = x3; uw[4] = x4; uw[5] = x5; uw[6] = x6; uw[7] = x7; + } + + //! @brief Set all four signed 32-bit integers. + inline void setSD( + int32_t x0, int32_t x1, int32_t x2, int32_t x3) ASMJIT_NOTHROW + { + sd[0] = x0; sd[1] = x1; sd[2] = x2; sd[3] = x3; + } + + //! @brief Set all four unsigned 32-bit integers. + inline void setUD( + uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) ASMJIT_NOTHROW + { + ud[0] = x0; ud[1] = x1; ud[2] = x2; ud[3] = x3; + } + + //! @brief Set all two signed 64-bit integers. + inline void setSQ( + int64_t x0, int64_t x1) ASMJIT_NOTHROW + { + sq[0] = x0; sq[1] = x1; + } + + //! @brief Set all two unsigned 64-bit integers. + inline void setUQ( + uint64_t x0, uint64_t x1) ASMJIT_NOTHROW + { + uq[0] = x0; uq[1] = x1; + } + + //! @brief Set all four SP-FP floats. + inline void setSF( + float x0, float x1, float x2, float x3) ASMJIT_NOTHROW + { + sf[0] = x0; sf[1] = x1; sf[2] = x2; sf[3] = x3; + } + + //! @brief Set all two DP-FP floats. + inline void setDF( + double x0, double x1) ASMJIT_NOTHROW + { + df[0] = x0; df[1] = x1; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Array of sixteen signed 8-bit integers. + int8_t sb[16]; + //! @brief Array of sixteen unsigned 8-bit integers. + uint8_t ub[16]; + //! @brief Array of eight signed 16-bit integers. + int16_t sw[8]; + //! @brief Array of eight unsigned 16-bit integers. + uint16_t uw[8]; + //! @brief Array of four signed 32-bit integers. + int32_t sd[4]; + //! @brief Array of four unsigned 32-bit integers. + uint32_t ud[4]; + //! @brief Array of two signed 64-bit integers. + int64_t sq[2]; + //! @brief Array of two unsigned 64-bit integers. + uint64_t uq[2]; + + //! @brief Array of four 32-bit single precision floating points. + float sf[4]; + //! @brief Array of two 64-bit double precision floating points. + double df[2]; +}; + +// ============================================================================ +// [AsmJit::Buffer] +// ============================================================================ + +//! @brief Buffer used to store instruction stream in AsmJit. +//! +//! This class can be dangerous, if you don't know how it works. Assembler +//! instruction stream is usually constructed by multiple calls of emit +//! functions that emits bytes, words, dwords or qwords. But to decrease +//! AsmJit library size and improve performance, we are not checking for +//! buffer overflow for each emit operation, but only once in highler level +//! emit instruction. +//! +//! So, if you want to use this class, you need to do buffer checking yourself +//! by using @c ensureSpace() method. It's designed to grow buffer if needed. +//! Threshold for growing is named @c growThreshold() and it means count of +//! bytes for emitting single operation. Default size is set to 16 bytes, +//! because x86 and x64 instruction can't be larger (so it's space to hold 1 +//! instruction). +//! +//! Example using Buffer: +//! +//! @code +//! // Buffer instance, growThreshold == 16 +//! // (no memory allocated in constructor). +//! AsmJit::Buffer buf(16); +//! +//! // Begin of emit stream, ensure space can fail on out of memory error. +//! if (buf.ensureSpace()) +//! { +//! // here, you can emit up to 16 (growThreshold) bytes +//! buf.emitByte(0x00); +//! buf.emitByte(0x01); +//! buf.emitByte(0x02); +//! buf.emitByte(0x03); +//! ... +//! } +//! @endcode +struct ASMJIT_API Buffer +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + inline Buffer(sysint_t growThreshold = 16) ASMJIT_NOTHROW : + _data(NULL), + _cur(NULL), + _max(NULL), + _capacity(0), + _growThreshold(growThreshold) + { + } + + inline ~Buffer() ASMJIT_NOTHROW + { + if (_data) ASMJIT_FREE(_data); + } + + //! @brief Get start of buffer. + inline uint8_t* getData() const ASMJIT_NOTHROW { return _data; } + + //! @brief Get current pointer in code buffer. + inline uint8_t* getCur() const ASMJIT_NOTHROW { return _cur; } + + //! @brief Get maximum pointer in code buffer for growing. + inline uint8_t* getMax() const ASMJIT_NOTHROW { return _max; } + + //! @brief Get current offset in buffer. + inline sysint_t getOffset() const ASMJIT_NOTHROW { return (sysint_t)(_cur - _data); } + + //! @brief Get capacity of buffer. + inline sysint_t getCapacity() const ASMJIT_NOTHROW { return _capacity; } + + //! @brief Get grow threshold. + inline sysint_t getGrowThreshold() const ASMJIT_NOTHROW { return _growThreshold; } + + //! @brief Ensure space for next instruction + inline bool ensureSpace() ASMJIT_NOTHROW { return (_cur >= _max) ? grow() : true; } + + //! @brief Sets offset to @a o and returns previous offset. + //! + //! This method can be used to truncate buffer or it's used to + //! overwrite specific position in buffer by Assembler. + inline sysint_t toOffset(sysint_t o) ASMJIT_NOTHROW + { + ASMJIT_ASSERT(o < _capacity); + + sysint_t prev = (sysint_t)(_cur - _data); + _cur = _data + o; + return prev; + } + + //! @brief Reallocate buffer. + //! + //! It's only used for growing, buffer is never reallocated to smaller + //! number than current capacity() is. + bool realloc(sysint_t to) ASMJIT_NOTHROW; + + //! @brief Used to grow the buffer. + //! + //! It will typically realloc to twice size of capacity(), but if capacity() + //! is large, it will use smaller steps. + bool grow() ASMJIT_NOTHROW; + + //! @brief Clear everything, but not deallocate buffer. + void clear() ASMJIT_NOTHROW; + + //! @brief Free buffer and NULL all pointers. + void free() ASMJIT_NOTHROW; + + //! @brief Take ownership of the buffer data and purge @c Buffer instance. + uint8_t* take() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Emit] + // -------------------------------------------------------------------------- + + //! @brief Emit Byte. + inline void emitByte(uint8_t x) ASMJIT_NOTHROW + { + *_cur++ = x; + } + + //! @brief Emit Word (2 bytes). + inline void emitWord(uint16_t x) ASMJIT_NOTHROW + { + *(uint16_t *)_cur = x; + _cur += 2; + } + + //! @brief Emit DWord (4 bytes). + inline void emitDWord(uint32_t x) ASMJIT_NOTHROW + { + *(uint32_t *)_cur = x; + _cur += 4; + } + + //! @brief Emit QWord (8 bytes). + inline void emitQWord(uint64_t x) ASMJIT_NOTHROW + { + *(uint64_t *)_cur = x; + _cur += 8; + } + + //! @brief Emit system signed integer (4 or 8 bytes). + inline void emitSysInt(sysint_t x) ASMJIT_NOTHROW + { + *(sysint_t *)_cur = x; + _cur += sizeof(sysint_t); + } + + //! @brief Emit system unsigned integer (4 or 8 bytes). + inline void emitSysUInt(sysuint_t x) ASMJIT_NOTHROW + { + *(sysuint_t *)_cur = x; + _cur += sizeof(sysuint_t); + } + + //! @brief Emit custom data. + void emitData(const void* ptr, sysuint_t len) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Get / Set] + // -------------------------------------------------------------------------- + + //! @brief Set byte at position @a pos. + inline uint8_t getByteAt(sysint_t pos) const ASMJIT_NOTHROW + { + return *reinterpret_cast<const uint8_t*>(_data + pos); + } + + //! @brief Set word at position @a pos. + inline uint16_t getWordAt(sysint_t pos) const ASMJIT_NOTHROW + { + return *reinterpret_cast<const uint16_t*>(_data + pos); + } + + //! @brief Set word at position @a pos. + inline uint32_t getDWordAt(sysint_t pos) const ASMJIT_NOTHROW + { + return *reinterpret_cast<const uint32_t*>(_data + pos); + } + + //! @brief Set word at position @a pos. + inline uint64_t getQWordAt(sysint_t pos) const ASMJIT_NOTHROW + { + return *reinterpret_cast<const uint64_t*>(_data + pos); + } + + //! @brief Set byte at position @a pos. + inline void setByteAt(sysint_t pos, uint8_t x) ASMJIT_NOTHROW + { + *reinterpret_cast<uint8_t*>(_data + pos) = x; + } + + //! @brief Set word at position @a pos. + inline void setWordAt(sysint_t pos, uint16_t x) ASMJIT_NOTHROW + { + *reinterpret_cast<uint16_t*>(_data + pos) = x; + } + + //! @brief Set word at position @a pos. + inline void setDWordAt(sysint_t pos, uint32_t x) ASMJIT_NOTHROW + { + *reinterpret_cast<uint32_t*>(_data + pos) = x; + } + + //! @brief Set word at position @a pos. + inline void setQWordAt(sysint_t pos, uint64_t x) ASMJIT_NOTHROW + { + *reinterpret_cast<uint64_t*>(_data + pos) = x; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + // All members are public, because they can be accessed and modified by + // Assembler/Compiler directly. + + //! @brief Beginning position of buffer. + uint8_t* _data; + //! @brief Current position in buffer. + uint8_t* _cur; + //! @brief Maximum position in buffer for realloc. + uint8_t* _max; + + //! @brief Buffer capacity (in bytes). + sysint_t _capacity; + + //! @brief Grow threshold + sysint_t _growThreshold; +}; + +// ============================================================================ +// [AsmJit::PodVector<>] +// ============================================================================ + +//! @brief Template used to store and manage array of POD data. +//! +//! This template has these adventages over other vector<> templates: +//! - Non-copyable (designed to be non-copyable, we want it) +//! - No copy-on-write (some implementations of stl can use it) +//! - Optimized for working only with POD types +//! - Uses ASMJIT_... memory management macros +template <typename T> +struct PodVector +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new instance of PodVector template. Data will not + //! be allocated (will be NULL). + inline PodVector() ASMJIT_NOTHROW : _data(NULL), _length(0), _capacity(0) + { + } + + //! @brief Destroy PodVector and free all data. + inline ~PodVector() ASMJIT_NOTHROW + { + if (_data) ASMJIT_FREE(_data); + } + + // -------------------------------------------------------------------------- + // [Data] + // -------------------------------------------------------------------------- + + //! @brief Get vector data. + inline T* getData() ASMJIT_NOTHROW { return _data; } + //! @overload + inline const T* getData() const ASMJIT_NOTHROW { return _data; } + //! @brief Get vector length. + inline sysuint_t getLength() const ASMJIT_NOTHROW { return _length; } + //! @brief get vector capacity (allocation capacity). + inline sysuint_t getCapacity() const ASMJIT_NOTHROW { return _capacity; } + + // -------------------------------------------------------------------------- + // [Manipulation] + // -------------------------------------------------------------------------- + + //! @brief Clear vector data, but not free internal buffer. + void clear() ASMJIT_NOTHROW + { + _length = 0; + } + + //! @brief Clear vector data and free internal buffer. + void free() ASMJIT_NOTHROW + { + if (_data) + { + ASMJIT_FREE(_data); + _data = 0; + _length = 0; + _capacity = 0; + } + } + + //! @brief Prepend @a item to vector. + bool prepend(const T& item) ASMJIT_NOTHROW + { + if (_length == _capacity && !_grow()) return false; + + memmove(_data + 1, _data, sizeof(T) * _length); + memcpy(_data, &item, sizeof(T)); + + _length++; + return true; + } + + //! @brief Insert an @a item at the @a index. + bool insert(sysuint_t index, const T& item) ASMJIT_NOTHROW + { + ASMJIT_ASSERT(index <= _length); + if (_length == _capacity && !_grow()) return false; + + T* dst = _data + index; + memmove(dst + 1, dst, _length - index); + memcpy(dst, &item, sizeof(T)); + + _length++; + return true; + } + + //! @brief Append @a item to vector. + bool append(const T& item) ASMJIT_NOTHROW + { + if (_length == _capacity && !_grow()) return false; + + memcpy(_data + _length, &item, sizeof(T)); + + _length++; + return true; + } + + //! @brief Get index of @a val or (sysuint_t)-1 if not found. + sysuint_t indexOf(const T& val) const ASMJIT_NOTHROW + { + sysuint_t i = 0, len = _length; + for (i = 0; i < len; i++) { if (_data[i] == val) return i; } + return (sysuint_t)-1; + } + + //! @brief Remove element at index @a i. + void removeAt(sysuint_t i) ASMJIT_NOTHROW + { + ASMJIT_ASSERT(i < _length); + + T* dst = _data + i; + _length--; + memmove(dst, dst + 1, _length - i); + } + + //! @brief Swap this pod-vector with @a other. + void swap(PodVector<T>& other) ASMJIT_NOTHROW + { + T* _tmp_data = _data; + sysuint_t _tmp_length = _length; + sysuint_t _tmp_capacity = _capacity; + + _data = other._data; + _length = other._length; + _capacity = other._capacity; + + other._data = _tmp_data; + other._length = _tmp_length; + other._capacity = _tmp_capacity; + } + + //! @brief Get item at position @a i. + inline T& operator[](sysuint_t i) ASMJIT_NOTHROW + { + ASMJIT_ASSERT(i < _length); + return _data[i]; + } + //! @brief Get item at position @a i. + inline const T& operator[](sysuint_t i) const ASMJIT_NOTHROW + { + ASMJIT_ASSERT(i < _length); + return _data[i]; + } + + //! @brief Append the item and return address so it can be initialized. + T* newItem() ASMJIT_NOTHROW + { + if (_length == _capacity && !_grow()) return NULL; + return _data + (_length++); + } + + // -------------------------------------------------------------------------- + // [Private] + // -------------------------------------------------------------------------- + +private: + //! @brief Called to grow internal array. + bool _grow() ASMJIT_NOTHROW + { + return _realloc(_capacity < 16 ? 16 : _capacity * 2); + } + + //! @brief Realloc internal array to fit @a to items. + bool _realloc(sysuint_t to) ASMJIT_NOTHROW + { + ASMJIT_ASSERT(to >= _length); + + T* p = reinterpret_cast<T*>(_data + ? ASMJIT_REALLOC(_data, to * sizeof(T)) + : ASMJIT_MALLOC(to * sizeof(T))); + if (!p) return false; + + _data = p; + _capacity = to; + return true; + } + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + + //! @brief Items data. + T* _data; + //! @brief Length of buffer (count of items in array). + sysuint_t _length; + //! @brief Capacity of buffer (maximum items that can fit to current array). + sysuint_t _capacity; + +private: + ASMJIT_DISABLE_COPY(PodVector<T>) +}; + +// ============================================================================ +// [AsmJit::Zone] +// ============================================================================ + +//! @brief Memory allocator designed to fast alloc memory that will be freed +//! in one step. +//! +//! @note This is hackery for performance. Concept is that objects created +//! by @c Zone are freed all at once. This means that lifetime of +//! these objects are same as zone object itselt. +//! +//! All emittables, variables, labels and states allocated by @c Compiler are +//! allocated through @c Zone object. +struct ASMJIT_API Zone +{ + // -------------------------------------------------------------------------- + // [Construction / Destruction] + // -------------------------------------------------------------------------- + + //! @brief Create new instance of @c Zone. + //! @param chunkSize Default size for one zone chunk. + Zone(sysuint_t chunkSize) ASMJIT_NOTHROW; + + //! @brief Destroy zone instance. + ~Zone() ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Methods] + // -------------------------------------------------------------------------- + + //! @brief Allocate @c size bytes of memory and return pointer to it. + //! + //! Pointer allocated by this way will be valid until @c Zone object is + //! destroyed. To create class by this way use placement @c new and + //! @c delete operators: + //! + //! @code + //! // Example of allocating simple class + //! + //! // Your class + //! class Object + //! { + //! // members... + //! }; + //! + //! // Your function + //! void f() + //! { + //! // We are using AsmJit namespace + //! using namespace AsmJit + //! + //! // Create zone object with chunk size of 65536 bytes. + //! Zone zone(65536); + //! + //! // Create your objects using zone object allocating, for example: + //! Object* obj = new(zone.alloc(sizeof(YourClass))) Object(); + //! + //! // ... lifetime of your objects ... + //! + //! // Destroy your objects: + //! obj->~Object(); + //! + //! // Zone destructor will free all memory allocated through it, + //! // alternative is to call @c zone.freeAll(). + //! } + //! @endcode + void* zalloc(sysuint_t size) ASMJIT_NOTHROW; + + //! @brief Helper to duplicate string. + char* zstrdup(const char* str) ASMJIT_NOTHROW; + + //! @brief Free all allocated memory except first block that remains for reuse. + //! + //! Note that this method will invalidate all instances using this memory + //! allocated by this zone instance. + void clear() ASMJIT_NOTHROW; + + //! @brief Free all allocated memory at once. + //! + //! Note that this method will invalidate all instances using this memory + //! allocated by this zone instance. + void freeAll() ASMJIT_NOTHROW; + + //! @brief Get total size of allocated objects - by @c alloc(). + inline sysuint_t getTotal() const ASMJIT_NOTHROW { return _total; } + //! @brief Get (default) chunk size. + inline sysuint_t getChunkSize() const ASMJIT_NOTHROW { return _chunkSize; } + + // -------------------------------------------------------------------------- + // [Chunk] + // -------------------------------------------------------------------------- + + //! @internal + //! + //! @brief One allocated chunk of memory. + struct ASMJIT_HIDDEN Chunk + { + //! @brief Link to previous chunk. + Chunk* prev; + //! @brief Position in this chunk. + sysuint_t pos; + //! @brief Size of this chunk (in bytes). + sysuint_t size; + + //! @brief Data. + uint8_t data[sizeof(void*)]; + + //! @brief Get count of remaining (unused) bytes in chunk. + inline sysuint_t getRemainingBytes() const ASMJIT_NOTHROW { return size - pos; } + }; + + // -------------------------------------------------------------------------- + // [Members] + // -------------------------------------------------------------------------- + +private: + //! @brief Last allocated chunk of memory. + Chunk* _chunks; + //! @brief Total size of allocated objects - by @c alloc() method. + sysuint_t _total; + //! @brief One chunk size. + sysuint_t _chunkSize; +}; + +//! @} + +} // AsmJit namespace + +#endif // _ASMJIT_UTIL_H diff --git a/lib/AsmJit/Util_p.h b/lib/AsmJit/Util_p.h new file mode 100644 index 0000000..71c7f2d --- /dev/null +++ b/lib/AsmJit/Util_p.h @@ -0,0 +1,130 @@ +// AsmJit - Complete JIT Assembler for C++ Language. + +// Copyright (c) 2008-2010, Petr Kobalicek <kobalicek.petr@gmail.com> +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +// [Guard] +#ifndef _ASMJIT_UTIL_P_H +#define _ASMJIT_UTIL_P_H + +// [Dependencies] +#include "Util.h" + +#include <stdlib.h> +#include <string.h> + +namespace AsmJit { + +//! @addtogroup AsmJit_Util +//! @{ + +// ============================================================================ +// [AsmJit::Util] +// ============================================================================ + +namespace Util +{ + // -------------------------------------------------------------------------- + // [AsmJit::floatAsInt32, int32AsFloat] + // -------------------------------------------------------------------------- + + //! @internal + //! + //! @brief used to cast from float to 32-bit integer and vica versa. + union I32FPUnion + { + //! @brief 32-bit signed integer value. + int32_t i; + //! @brief 32-bit SP-FP value. + float f; + }; + + //! @internal + //! + //! @brief used to cast from double to 64-bit integer and vica versa. + union I64FPUnion + { + //! @brief 64-bit signed integer value. + int64_t i; + //! @brief 64-bit DP-FP value. + double f; + }; + + //! @brief Binary cast from 32-bit integer to SP-FP value (@c float). + static inline float int32AsFloat(int32_t i) ASMJIT_NOTHROW + { + I32FPUnion u; + u.i = i; + return u.f; + } + + //! @brief Binary cast SP-FP value (@c float) to 32-bit integer. + static inline int32_t floatAsInt32(float f) ASMJIT_NOTHROW + { + I32FPUnion u; + u.f = f; + return u.i; + } + + //! @brief Binary cast from 64-bit integer to DP-FP value (@c double). + static inline double int64AsDouble(int64_t i) ASMJIT_NOTHROW + { + I64FPUnion u; + u.i = i; + return u.f; + } + + //! @brief Binary cast from DP-FP value (@c double) to 64-bit integer. + static inline int64_t doubleAsInt64(double f) ASMJIT_NOTHROW + { + I64FPUnion u; + u.f = f; + return u.i; + } + + // -------------------------------------------------------------------------- + // [Str Utils] + // -------------------------------------------------------------------------- + + ASMJIT_HIDDEN char* mycpy(char* dst, const char* src, sysuint_t len = (sysuint_t)-1) ASMJIT_NOTHROW; + ASMJIT_HIDDEN char* myfill(char* dst, const int c, sysuint_t len) ASMJIT_NOTHROW; + ASMJIT_HIDDEN char* myhex(char* dst, const uint8_t* src, sysuint_t len) ASMJIT_NOTHROW; + ASMJIT_HIDDEN char* myutoa(char* dst, sysuint_t i, sysuint_t base = 10) ASMJIT_NOTHROW; + ASMJIT_HIDDEN char* myitoa(char* dst, sysint_t i, sysuint_t base = 10) ASMJIT_NOTHROW; + + // -------------------------------------------------------------------------- + // [Mem Utils] + // -------------------------------------------------------------------------- + + static inline void memset32(uint32_t* p, uint32_t c, sysuint_t len) ASMJIT_NOTHROW + { + sysuint_t i; + for (i = 0; i < len; i++) p[i] = c; + } +} // Util namespace + +//! @} + +} // AsmJit namespace + +#endif // _ASMJIT_UTIL_P_H diff --git a/src/Experiment.h b/src/Experiment.h index 0089c2f..e90d55c 100644 --- a/src/Experiment.h +++ b/src/Experiment.h @@ -85,7 +85,7 @@ public: const static int32 DEFAULT_BYTES_PER_THREAD = DEFAULT_BYTES_PER_CHAIN * DEFAULT_CHAINS_PER_THREAD; const static int32 DEFAULT_THREADS = 1; const static int32 DEFAULT_BYTES_PER_TEST = DEFAULT_BYTES_PER_THREAD * DEFAULT_THREADS; - const static int32 DEFAULT_BUSY_CYCLES = 10; + const static int32 DEFAULT_BUSY_CYCLES = 0; const static int32 DEFAULT_SECONDS = 1; const static int32 DEFAULT_ITERATIONS = 0; const static int32 DEFAULT_EXPERIMENTS = 1; diff --git a/src/Run.cpp b/src/Run.cpp index c5026e9..1075554 100644 --- a/src/Run.cpp +++ b/src/Run.cpp @@ -12,6 +12,8 @@ #include <stdio.h> #include <stdlib.h> #include <unistd.h> +#include <stddef.h> +#include <vector> #if defined(NUMA) #include <numa.h> @@ -19,21 +21,24 @@ #include "Run.h" +#include <AsmJit/AsmJit.h> + #include "Chain.h" #include "Timer.h" #include "SpinBarrier.h" static double max(double v1, double v2); static double min(double v1, double v2); -static void chase_pointers(int64 chains_per_thread, int64 iterations, - Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, - int64 busy_cycles, bool prefetch); -static void follow_streams(int64 chains_per_thread, int64 iterations, - Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, - int64 busy_cycles, bool prefetch); -static void (*run_benchmark)(int64 chains_per_thread, int64 iterations, - Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, - int64 busy_cycles, bool prefetch) = chase_pointers; +typedef void (*benchmark)(const Chain**); +typedef benchmark (*generator)(int64 chains_per_thread, + int64 bytes_per_line, int64 bytes_per_chain, + int64 stride, int64 busy_cycles, bool prefetch); +static benchmark chase_pointers(int64 chains_per_thread, + int64 bytes_per_line, int64 bytes_per_chain, + int64 stride, int64 busy_cycles, bool prefetch); +static benchmark follow_streams(int64 chains_per_thread, + int64 bytes_per_line, int64 bytes_per_chain, + int64 stride, int64 busy_cycles, bool prefetch); Lock Run::global_mutex; int64 Run::_ops_per_chain = 0; @@ -85,48 +90,40 @@ int Run::run() { #endif // initialize the chains and - // select the function that + // compile the function that // will execute the tests + generator gen; for (int i = 0; i < this->exp->chains_per_thread; i++) { if (this->exp->access_pattern == Experiment::RANDOM) { root[i] = random_mem_init(chain_memory[i]); - run_benchmark = chase_pointers; + gen = chase_pointers; } else if (this->exp->access_pattern == Experiment::STRIDED) { if (0 < this->exp->stride) { root[i] = forward_mem_init(chain_memory[i]); } else { root[i] = reverse_mem_init(chain_memory[i]); } - run_benchmark = chase_pointers; + gen = chase_pointers; } else if (this->exp->access_pattern == Experiment::STREAM) { root[i] = stream_mem_init(chain_memory[i]); - run_benchmark = follow_streams; + gen = follow_streams; } } - // Calculate the amount of NOP's to hit the requested processing cycles - // TODO: this shouldn't be dynamically counted in the chase_pointers - // method, but rather compiled dynamically? - int64 nops = (this->exp->busy_cycles - 1) / 7; - // Assembler for loop - // initialize counter -- 1 initialization instruction - // nop -\ - // add one to counter | - // move counter to register | - // compare register to upper limit | 7 instuctions per iteration - // set compare byte if still less | - // test the compare byte | - // jump depending on the test output -/ - if (nops < 0) - nops = 0; - if (this->exp->iterations <= 0) { + // compile benchmark + benchmark bench = gen(this->exp->chains_per_thread, + this->exp->bytes_per_line, this->exp->bytes_per_chain, + this->exp->stride, this->exp->busy_cycles, + this->exp->prefetch); + volatile static double istart = 0; volatile static double istop = 0; volatile static double elapsed = 0; volatile static int64 iters = 1; volatile double bound = max(0.2, 10 * Timer::resolution()); for (iters = 1; elapsed <= bound; iters = iters << 1) { + // barrier this->bp->barrier(); // start timer @@ -136,10 +133,8 @@ int Run::run() { this->bp->barrier(); // chase pointers - run_benchmark(this->exp->chains_per_thread, iters, root, - this->exp->bytes_per_line, this->exp->bytes_per_chain, - this->exp->stride, nops, - this->exp->prefetch); + for (int i = 0; i < iters; i++) + bench((const Chain**) root); // barrier this->bp->barrier(); @@ -166,8 +161,14 @@ int Run::run() { #if defined(UNDEFINED) #endif - // barrier + // compile benchmark + benchmark bench = gen(this->exp->chains_per_thread, + this->exp->bytes_per_line, this->exp->bytes_per_chain, + this->exp->stride, this->exp->busy_cycles, + this->exp->prefetch); + for (int e = 0; e < this->exp->experiments; e++) { + // barrier this->bp->barrier(); // start timer @@ -177,9 +178,8 @@ int Run::run() { this->bp->barrier(); // chase pointers - run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root, - this->exp->bytes_per_line, this->exp->bytes_per_chain, - this->exp->stride, nops, this->exp->prefetch); + for (int i = 0; i < this->exp->iterations; i++) + bench((const Chain**) root); // barrier this->bp->barrier(); @@ -369,594 +369,83 @@ void mem_chk(Chain *m) { dumb_ck += 1; } -static void chase_pointers(int64 chains_per_thread, // memory loading per thread - int64 iterations, // number of iterations per experiment - Chain** root, // root(s) of the chain(s) to follow +static benchmark chase_pointers(int64 chains_per_thread, // memory loading per thread int64 bytes_per_line, // ignored int64 bytes_per_chain, // ignored int64 stride, // ignored int64 busy_cycles, // processing cycles bool prefetch // prefetch? ) { - // chase pointers - switch (chains_per_thread) { - default: - case 1: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - do { - a = a->next; - if (prefetch) - prefetch(a->next); - asm("nop"); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - asm("nop"); - } while (a != root[0]); - mem_chk(a); - } - break; - case 2: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - do { - a = a->next; - b = b->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - } - break; - case 3: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - do { - a = a->next; - b = b->next; - c = c->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - } - break; - case 4: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - do { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - mem_chk(d); - } - break; - case 5: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - do { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - mem_chk(d); - mem_chk(e); - } - break; - case 6: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - do { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - mem_chk(d); - mem_chk(e); - mem_chk(f); - } - break; - case 7: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - do { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - mem_chk(d); - mem_chk(e); - mem_chk(f); - mem_chk(g); - } - break; - case 8: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - do { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - mem_chk(d); - mem_chk(e); - mem_chk(f); - mem_chk(g); - mem_chk(h); - } - break; - case 9: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - do { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - mem_chk(d); - mem_chk(e); - mem_chk(f); - mem_chk(g); - mem_chk(h); - mem_chk(j); - } - break; - case 10: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - do { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - mem_chk(d); - mem_chk(e); - mem_chk(f); - mem_chk(g); - mem_chk(h); - mem_chk(j); - mem_chk(k); - } - break; - case 11: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - do { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - mem_chk(d); - mem_chk(e); - mem_chk(f); - mem_chk(g); - mem_chk(h); - mem_chk(j); - mem_chk(k); - mem_chk(l); - } - break; - case 12: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - do { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - mem_chk(d); - mem_chk(e); - mem_chk(f); - mem_chk(g); - mem_chk(h); - mem_chk(j); - mem_chk(k); - mem_chk(l); - mem_chk(m); - } - break; - case 13: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - do { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - mem_chk(d); - mem_chk(e); - mem_chk(f); - mem_chk(g); - mem_chk(h); - mem_chk(j); - mem_chk(k); - mem_chk(l); - mem_chk(m); - mem_chk(n); - } - break; - case 14: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - Chain* o = root[13]; - do { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - o = o->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - mem_chk(d); - mem_chk(e); - mem_chk(f); - mem_chk(g); - mem_chk(h); - mem_chk(j); - mem_chk(k); - mem_chk(l); - mem_chk(m); - mem_chk(n); - mem_chk(o); - } - break; - case 15: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - Chain* o = root[13]; - Chain* p = root[14]; - do { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - o = o->next; - p = p->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - mem_chk(d); - mem_chk(e); - mem_chk(f); - mem_chk(g); - mem_chk(h); - mem_chk(j); - mem_chk(k); - mem_chk(l); - mem_chk(m); - mem_chk(n); - mem_chk(o); - mem_chk(p); - } - break; - case 16: - for (int64 i = 0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - Chain* o = root[13]; - Chain* p = root[14]; - Chain* q = root[15]; - do { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - o = o->next; - p = p->next; - q = q->next; - if (prefetch) - prefetch(a->next); - for (int64 j = 0; j < busy_cycles; j++) - asm("nop"); - } while (a != root[0]); - mem_chk(a); - mem_chk(b); - mem_chk(c); - mem_chk(d); - mem_chk(e); - mem_chk(f); - mem_chk(g); - mem_chk(h); - mem_chk(j); - mem_chk(k); - mem_chk(l); - mem_chk(m); - mem_chk(n); - mem_chk(o); - mem_chk(p); - mem_chk(q); - } + // Create Compiler. + AsmJit::Compiler c; + + // Tell compiler the function prototype we want. It allocates variables representing + // function arguments that can be accessed through Compiler or Function instance. + c.newFunction(AsmJit::CALL_CONV_DEFAULT, AsmJit::FunctionBuilder1<AsmJit::Void, const Chain**>()); + + // Try to generate function without prolog/epilog code: + c.getFunction()->setHint(AsmJit::FUNCTION_HINT_NAKED, true); + + // Create labels. + AsmJit::Label L_Loop = c.newLabel(); + + // Function arguments. + AsmJit::GPVar chain(c.argGP(0)); + + // Save the head + std::vector<AsmJit::GPVar> heads(chains_per_thread); + for (int i = 0; i < chains_per_thread; i++) { + AsmJit::GPVar head = c.newGP(); + c.mov(head, ptr(chain)); + heads[i] = head; + } + + // Current position + std::vector<AsmJit::GPVar> positions(chains_per_thread); + for (int i = 0; i < chains_per_thread; i++) { + AsmJit::GPVar position = c.newGP(); + c.mov(position, heads[0]); + positions[i] = position; } + + // Loop. + c.bind(L_Loop); + + // Process all links + for (int i = 0; i < chains_per_thread; i++) { + // Chase pointer + c.mov(positions[i], ptr(positions[i], offsetof(Chain, next))); + + // Prefetch next + // TODO + } + + // Wait + for (int i = 0; i < busy_cycles; i++) + c.nop(); + + // Test if end reached + c.cmp(heads[0], positions[0]); + c.jne(L_Loop); + + // Finish. + c.endFunction(); + + // Make JIT function. + benchmark fn = AsmJit::function_cast<benchmark>(c.make()); + + // Ensure that everything is ok. + if (!fn) { + printf("Error making jit function (%u).\n", c.getError()); + return 0; + } + + return fn; } // NOT WRITTEN YET -- DMP // JUST A PLACE HOLDER! -Chain* -Run::stream_mem_init(Chain *mem) { +Chain* Run::stream_mem_init(Chain *mem) { // fprintf(stderr, "made it into stream_mem_init.\n"); // fprintf(stderr, "chains_per_thread = %ld\n", this->exp->chains_per_thread); // fprintf(stderr, "iterations = %ld\n", this->exp->iterations); @@ -990,15 +479,15 @@ void sum_chk(double t) { // NOT WRITTEN YET -- DMP // JUST A PLACE HOLDER! -static void follow_streams(int64 chains_per_thread, // memory loading per thread - int64 iterations, // number of iterations per experiment - Chain** root, // root(s) of the chain(s) to follow +static benchmark follow_streams(int64 chains_per_thread, // memory loading per thread int64 bytes_per_line, // ignored int64 bytes_per_chain, // ignored int64 stride, // ignored int64 busy_cycles, // ignored bool prefetch // ignored ) { + return 0; + /* int64 refs_per_line = bytes_per_line / sizeof(double); int64 refs_per_chain = bytes_per_chain / sizeof(double); @@ -1298,4 +787,5 @@ static void follow_streams(int64 chains_per_thread, // memory loading per thread } break; } + */ } |