amd64
also called x86-64
registers
16 general purpose registers + instruction pointer.
x86-32 - 32bit/4byte wide
x86-64 - 64bit/8byte wide
naming convention


rflags
eflags extended to 64 bits, but the extended space is not used.
status flags
zero flag (6) - set if the result of some instruction is zero
sign flag (7) - set equal to the most significant bit of the result which is the sign. 1=neg 0=pos
c data type size
char -> 1 byte
short -> 2 bytes -> word
int/long -> 4 bytes -> double word
double/long long -> 8 bytes -> quad word
usage convention (intel)
intel's suggestion of usage of registers:
rax - return value of functions (a=accumulator)
rbx - base pointer to data section (b=base)
rcx - can be used for loops (c=counter)
rdx - i/o pointer (d=data)
rsi - points to string for string operation (si=string index)
rdi - points to string for string operation (di=destination index)
rsp - stack pointer: points to top of the stack
rbp - base pointer: maintains base of a function's stack frame
rip - instruction pointer - points to next instruction
reading manual
r/mX
; operand has form `r/mX` which means you can specify register or memory, of size x bits
; r/m16
; r/m32
; r/m64
; operand can be specified in 4 forms:
rax ; -> value in rax
[rax] ; -> dererence address in rax, and get the value
[rax + rcs * x]
[rax + rcs * x + Y]
; memory descriptors are used to indicate the size of data being operated on
mov qword ptr [rsp], rax
immX
; operand can specify immediate values of x bit
; imm64
; imm32
; imm16
0x1234
syntax
; intel
; destination <- source
add rax, 0x20
; at&t
; source -> destination
add $0x20, %rax
instructions
nop
; no operation.
; does nothing.
; is an alias for `xchg eax, eax` which also does nothing.
; opcode - 0x90, but can also be multi byte.
nop
push
push operand
; push quadword (8 byte) to stack.
; stack pointer decrements by 8
; operaand can be r/m16 r/m32 r/m64
push rax
push [rax]
push [rax + rcx * x] ; x = 1 2 4 8
; operand can be imm8 imm16 imm32
push 0xdeadbeef
pop
pop operand
; increments rsp by 8
; operand can be r/mX
pop rax
pop [rax]
call
; transfer and resume control
; pushes the next address on stack and jump to `operand`
; destination address can be:
; absolute address
; relative address
call 0xdeadbeef
ret
; return from prodecure
ret ; pop the top of the stack to rip
ret 0x20 ; pop the top of the stack to rip and add 0x20 bytes to rsp
mov
; register <-> register
mov rax, rbx
; memory <-> register
mov rax, [rbx]
mov rax, [rbx + rcx * 12]
mov [rbx], rax
mov [rbx + rcx * 12], rax
; memory <- immediate | only supports imm32
mov [rbx + rcx * 12], 0xdeadbeef
; register <- immediate
mov rax, 0xdeadbeef
; does not move memory to memory
; mov can only move imm32 to memory.
add & sub
; adds or substracts value
; destination can be `r/mX`
; source can be `r/mX` or `immX`
; except memory to memory
add rax, 0x10
sub rax, [rbx + rcx * 12]
multiplication
; imul
; signed multiply
; three forms
; one operand
imul r/m8 ; ax = al * r/m8
imul r/m16 ; dx:ax = ax * r/m16
imul r/m32 ; edx:eax = eax * r/m32
imul r/m64 ; rdx:rax = rax * r/m64
; two operand
; truncation is possible
; r16 = r16 * r/m16
; r32 = r32 * r/m32
; r64 = r64 * r/m64
imul reg, r/mX
; three operand
imul reg, r/mX, imm
; r16 = r/m16 * (8-bit immediate sign-extended to 16 bits)
; r32 = r/m32 * (8-bit immediate sign-extended to 32 bits)
; r64 = r/m64 * (8-bit immediate sign-extended to 64 bits)
; r16 = r/m16 * 16-bit immediate
; r32 = r/m32 * 32-bit immediate
: r64 = r/m64 * (32-bit immediate sign-extended to 64 bits)
division
; div
; unsigned divide
; divide ax by r/m8 | al -> quotient | ah -> remainder
div cl ; disassemblers might specify as div ax, cx
; divide dx:ax by r/m16 | ax -> quotient | dx -> remainder
div cx ; disassemblers might specify as div ax, cx
; divide edx:eax by r/m32 | eax -> quotient | edx -> remainder
div ecx ; disassemblers might specify as div eax, ecx
; deivide rdx:rax by r/m64 | rax -> quotient | rdx -> remainder
div rcx ; disassemblers might specify as div rax, rcx
; idiv
; signed divide, same form as div
movzx
; move with zero extend.
; usefull for moving small values to large values, treating values as unsinged
; fills high order bits with zero
mov eax, 0xf00dface
movzx rbx, eax ; rbx = 0x00000000f00dface
movsx
; move with sign extend.
; use for moving small values to large values, treading values as signed.
; eg; moving 4 bytes -1 to 8 bytes -1 preserving signness.
; sign extends only 8 or 16 bit values (byte or word)
movsxd
; move 32 bit signed value (double word) to 64 bit (quadword) value
mov eax, 0xf00dface
movsxd rbx, eax ; rbx = 0xfffffffff00dface
lea
; load effective address
; used for pointer arithmetic
; the only instruction where `[]` does not mean dereference
lea rax, [rdx + rbx * 8 + 5]
; calculate value inside the square bracket and stored value in rax without dereferencing
jmp
; unconditional jump
; change rip to given location
; short/relative jump
jmp displacement ; rip = rip of next instruction + 1 byte sign extend to 64 bits displacement
jmp address ; is just encoded as jmp n bytes forward for example
cmp
; comparison instruction
; the second operand is subtracted from the first operand and status flag is set accordingly
; different from subtract because the result is not stored anywhere, unlike sub
jcc
; conditional jump
jz/je ; zf=1
jnz/jne ; zf=0
jle/jng ; zf=1 or sf!=of
jge/jnl ; sf=of
jbe/jna ; cf=1 or zf=1
jb ; cf=1
js ; sf=1
jns ; sf=0
; a = above | unsigned notion | 0xff is above 0x00
; b = below | unsigned notion | 0x00 is below 0xff
; g = greater | signed notion | 0x00 is greater than 0xff
; l = less | singed notion | 0xff is less than 0x00
; e = equal | same as zero flag set
; n = not
cmp dword ptr [rsp+4], eax
; jump if value at [rsp+4] is not equal to eax
jne 0xdeadbeef
; jump if value at [rsp+4] is less than or equal to signed value in eax
jle 0xdeadbeef
; jump if value at [rsp+4] is greater than or equal to unsigned value in eax
jge 0xdeadbeef
boolean instructions
; all boolean instructions set the status flag accordingly
; bitwise and
and al, bl
and al, 0x42
; bitwise or
or al, bl
or al, 0x42
; xor - exclusive or
xor eax, ebx
xor eax, eax ; zero out a register
; not
not rax
inc/dec
; operand can be r/mX
; increase or decrease value by 1
; modifies status flag
inc rax
dec rbx
test
; calcualte bitwise and, sets the status flags and discard the result.
; similar to cmp but for and operation.
text eax, ebx
; usually used to check if register is zero
; jump to 0xdeadbeef if eax is zero
test eax, eax
jz 0xdeadbeef
shift instructions
; shl
; shift left (<< in C)
; first operand -> r/mX
; second operand cl | imm8
; sets carry flag if bits go beyond the size
; another of multiplying with power of 2
shl bl, 2 ; multiply bl with 2^2
shl bl, 4 ; multiply bl with 2^4
; shr
; shift right (>> in C)
; first operand -> r/mX
; second operand cl | imm8
; sets carry flag if bits go beyond the size
; another of dividing with power of 2
shr bl, 2 ; dividing by 2^2
shr bl, 4 ; dividing by 2^4
; sar
; shift arithmetic right for signed values
; fills the new shifted bits with whatever the MSb is
; first operand -> r/mX
; second operand cl | imm8
; bl = 10110011
sar bl, 2
; bl = 11101100
; sal
; shift arithmetic left
; same as shl
sal bl, 2
rep instructions
; all rep instruction uses *cx as a counter
; rep stos
; repeat store string
; fills 1/2/4/8 bytes at [rdi] with al/ax/eax/rax rcx times
; rep movs
; repeat move string; assembly equivalent of memcpy
; copies 1/2/4/8 at a time from [*si] to [*di]
rep movs dword ptr [rdi], dword ptr [rsi]
; the direction flag specifies the direction in which copy occurs
; based on the direction, ri and di increments or decrements
register convention
caller save register
register belongs to the callee
caller must save the register, assuming it will be changed by the callee.
; visual studio
rax, rcx, rdx, r8, r9, r10, r11
; gcc
rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11
callee save register
register belongs to the caller.
hence, callee must save those register and restore them before returning.
; visual studio
rbx, rbp, rdi, rsi, r12, r13, r14, r15
; gcc
rbx, rbp, r12, r13, r14, r15
calling conventions
compilers use a subset of caller save registers for passing arguments in and out of the function.
rax is used to return values from function given the value is 64 bytes or less.
ms x64 abi
int function(
int a, // rcx
int b, // rdx
int c, // r8
int d, // r9
int e, // pushed on stack
int f // pushed on stack
) { .. }
in most cases, ms abi does not use frame pointer. no ebp register to maintain base of stack frame.
if space is dynamically allocated using
_alloca()
, then frame pointers is used to mark base of the stack frame.ms document doesn't say that it has to be
ebp
ms shadow store
ms uses 4 register fall call calling convention by default.
the caller must allocate space to accommodate 4 function parameter. this is called shadow store.
parameter beyond the first 4, should be pushed on the stack before calling the function. the first four is then passed through 4 registers. the callee, can use the space on the shadow store to save the first four register.
this is the reason by why a simple function with no variable, by default reserves a space for 0x28 bytes (16 byte padding for return alignment and 4 8 byte chunk of shadow store)
eg for 6 parameters passed
caller allocated shadow store for 4 parameters
caller places the last two parameters on the stack
callee passes first 4 parameter over registers
callee moves value from these 4 register to shadow store
system V x86-64 abi
int function(
int a, // rdi
int b, // rsi
int c, // rdx
int d, // rcx
int e, // r8
int f, // r9
int g, // stack
int h, // stack
) { .. }
frame pointers are used to mark the base of the stack frame using
rbp
register. can be disabled.
push rbp
mov rbp, rsp
...
...
pop rbp
ret
32-bit calling convention
cdecl
default in most c code
caller cleans up the stack
stdcall
used in wind32 apis
callee cleans up the stack
function parameters are pushed on the stack from right to left.
both these calling convention uses something called stack frame pointer.
this uses ebp register to maintain a stack base pointer
writing assembly
inline assembly
gcc
inline assembly is supported in gcc using the gas syntax
visual studio
inline assembly was supported in x32 but not anymore in x64. but a some instructin can be used in c code using visual studio instrinsics.
// they look like function calls but compiler translate them to literal assembly instructions
// fuctions like __stosX
standalone assembly
GCC
AS
GNU assembler, integrates with gcc, ld etc.
uses at&t syntax by default
.intel_syntax noprefix
.global _start ; tells linked where to find start execution
_start: _start is a label program execute from here
mov rax, 60
mov rdi, 42
syscall
can be compiled with
as
and linked withld
as code.s -o code.o
ld asm.o -o code
./code
all of this can be simply done directly by gcc
gcc -nostdlib code.s -o code
./code
MASM
microsoft assembler, used with visual studio
PUBLIC asm_function
.code
asm_function PROC
mov rax, 1
ret
asm_function ENDP
end
NASM - netwide assembler - cross platform
Last updated