ish/jit/gen.c
2020-01-05 22:30:42 -08:00

543 lines
19 KiB
C

#include <assert.h>
#include "jit/gen.h"
#include "emu/modrm.h"
#include "emu/cpuid.h"
#include "emu/fpu.h"
#include "emu/sse.h"
#include "emu/interrupt.h"
static void gen(struct gen_state *state, unsigned long thing) {
assert(state->size <= state->capacity);
if (state->size >= state->capacity) {
state->capacity *= 2;
struct jit_block *bigger_block = realloc(state->block,
sizeof(struct jit_block) + state->capacity * sizeof(unsigned long));
if (bigger_block == NULL) {
die("out of memory while jitting");
}
state->block = bigger_block;
}
assert(state->size < state->capacity);
state->block->code[state->size++] = thing;
}
void gen_start(addr_t addr, struct gen_state *state) {
state->capacity = JIT_BLOCK_INITIAL_CAPACITY;
state->size = 0;
state->ip = addr;
for (int i = 0; i <= 1; i++) {
state->jump_ip[i] = 0;
}
state->block_patch_ip = 0;
struct jit_block *block = malloc(sizeof(struct jit_block) + state->capacity * sizeof(unsigned long));
state->block = block;
block->addr = addr;
}
void gen_end(struct gen_state *state) {
struct jit_block *block = state->block;
for (int i = 0; i <= 1; i++) {
if (state->jump_ip[i] != 0) {
block->jump_ip[i] = &block->code[state->jump_ip[i]];
block->old_jump_ip[i] = *block->jump_ip[i];
} else {
block->jump_ip[i] = NULL;
}
list_init(&block->jumps_from[i]);
list_init(&block->jumps_from_links[i]);
}
if (state->block_patch_ip != 0) {
block->code[state->block_patch_ip] = (unsigned long) block;
}
if (block->addr != state->ip)
block->end_addr = state->ip - 1;
else
block->end_addr = block->addr;
list_init(&block->chain);
block->is_jetsam = false;
for (int i = 0; i <= 1; i++) {
list_init(&block->page[i]);
}
}
void gen_exit(struct gen_state *state) {
extern void gadget_exit(void);
// in case the last instruction didn't end the block
gen(state, (unsigned long) gadget_exit);
gen(state, state->ip);
}
#define DECLARE_LOCALS \
dword_t saved_ip = state->ip; \
dword_t addr_offset = 0; \
bool end_block = false; \
bool seg_gs = false
#define FINISH \
return !end_block
#define RESTORE_IP state->ip = saved_ip
#define _READIMM(name, size) \
if (!tlb_read(tlb, state->ip, &name, size/8)) SEGFAULT; \
state->ip += size/8
#define READMODRM if (!modrm_decode32(&state->ip, tlb, &modrm)) SEGFAULT
#define READADDR _READIMM(addr_offset, 32)
#define SEG_GS() seg_gs = true
// This should stay in sync with the definition of .gadget_array in gadgets.h
enum arg {
arg_reg_a, arg_reg_c, arg_reg_d, arg_reg_b, arg_reg_sp, arg_reg_bp, arg_reg_si, arg_reg_di,
arg_imm, arg_mem, arg_addr, arg_gs,
arg_count, arg_invalid,
// the following should not be synced with the list mentioned above (no gadgets implement them)
arg_modrm_val, arg_modrm_reg,
arg_xmm_modrm_val, arg_xmm_modrm_reg,
arg_mem_addr, arg_1,
};
enum size {
size_8, size_16, size_32,
size_count,
size_64, size_80, size_128, // bonus sizes
};
// sync with COND_LIST in control.S
enum cond {
cond_O, cond_B, cond_E, cond_BE, cond_S, cond_P, cond_L, cond_LE,
cond_count,
};
enum repeat {
rep_once, rep_repz, rep_repnz,
rep_count,
rep_rep = rep_repz,
};
typedef void (*gadget_t)(void);
#define GEN(thing) gen(state, (unsigned long) (thing))
#define g(g) do { extern void gadget_##g(void); GEN(gadget_##g); } while (0)
#define gg(_g, a) do { g(_g); GEN(a); } while (0)
#define ggg(_g, a, b) do { g(_g); GEN(a); GEN(b); } while (0)
#define gggg(_g, a, b, c) do { g(_g); GEN(a); GEN(b); GEN(c); } while (0)
#define ggggg(_g, a, b, c, d) do { g(_g); GEN(a); GEN(b); GEN(c); GEN(d); } while (0)
#define gggggg(_g, a, b, c, d, e) do { g(_g); GEN(a); GEN(b); GEN(c); GEN(d); GEN(e); } while (0)
#define ga(g, i) do { extern gadget_t g##_gadgets[]; if (g##_gadgets[i] == NULL) UNDEFINED; GEN(g##_gadgets[i]); } while (0)
#define gag(g, i, a) do { ga(g, i); GEN(a); } while (0)
#define gagg(g, i, a, b) do { ga(g, i); GEN(a); GEN(b); } while (0)
#define gz(g, z) ga(g, sz(z))
#define h(h) gg(helper_0, h)
#define hh(h, a) ggg(helper_1, h, a)
#define hhh(h, a, b) gggg(helper_2, h, a, b)
#define h_read(h, z) do { g_addr(); gg_here(helper_read##z, h##z); } while (0)
#define h_write(h, z) do { g_addr(); gg_here(helper_write##z, h##z); } while (0)
#define gg_here(g, a) ggg(g, a, saved_ip)
#define UNDEFINED do { gg_here(interrupt, INT_UNDEFINED); return false; } while (0)
#define SEGFAULT do { gg_here(interrupt, INT_GPF); return false; } while (0)
static inline int sz(int size) {
switch (size) {
case 8: return size_8;
case 16: return size_16;
case 32: return size_32;
default: return -1;
}
}
bool gen_addr(struct gen_state *state, struct modrm *modrm, bool seg_gs, dword_t saved_ip) {
if (modrm->base == reg_none)
gg(addr_none, modrm->offset);
else
gag(addr, modrm->base, modrm->offset);
if (modrm->type == modrm_mem_si)
ga(si, modrm->index * 4 + modrm->shift);
if (seg_gs)
g(seg_gs);
return true;
}
#define g_addr() gen_addr(state, &modrm, seg_gs, saved_ip)
// this really wants to use all the locals of the decoder, which we can do
// really nicely in gcc using nested functions, but that won't work in clang,
// so we explicitly pass 500 arguments. sorry for the mess
static inline bool gen_op(struct gen_state *state, gadget_t *gadgets, enum arg arg, struct modrm *modrm, uint64_t *imm, int size, dword_t saved_ip, bool seg_gs, dword_t addr_offset) {
size = sz(size);
gadgets = gadgets + size * arg_count;
switch (arg) {
case arg_modrm_reg:
// TODO find some way to assert that this won't overflow?
arg = modrm->reg + arg_reg_a; break;
case arg_modrm_val:
if (modrm->type == modrm_reg)
arg = modrm->base + arg_reg_a;
else
arg = arg_mem;
break;
case arg_mem_addr:
arg = arg_mem;
modrm->type = modrm_mem;
modrm->base = reg_none;
modrm->offset = addr_offset;
break;
case arg_1:
arg = arg_imm;
*imm = 1;
break;
}
if (arg >= arg_count || gadgets[arg] == NULL) {
UNDEFINED;
}
if (arg == arg_mem || arg == arg_addr) {
if (!gen_addr(state, modrm, seg_gs, saved_ip))
return false;
}
GEN(gadgets[arg]);
if (arg == arg_imm)
GEN(*imm);
else if (arg == arg_mem)
GEN(saved_ip);
return true;
}
#define op(type, thing, z) do { \
extern gadget_t type##_gadgets[]; \
if (!gen_op(state, type##_gadgets, arg_##thing, &modrm, &imm, z, saved_ip, seg_gs, addr_offset)) return false; \
} while (0)
#define load(thing, z) op(load, thing, z)
#define store(thing, z) op(store, thing, z)
// load-op-store
#define los(o, src, dst, z) load(dst, z); op(o, src, z); store(dst, z)
#define lo(o, src, dst, z) load(dst, z); op(o, src, z)
#define MOV(src, dst,z) load(src, z); store(dst, z)
#define MOVZX(src, dst,zs,zd) load(src, zs); gz(zero_extend, zs); store(dst, zd)
#define MOVSX(src, dst,zs,zd) load(src, zs); gz(sign_extend, zs); store(dst, zd)
// xchg must generate in this order to be atomic
#define XCHG(src, dst,z) load(src, z); op(xchg, dst, z); store(src, z)
#define ADD(src, dst,z) los(add, src, dst, z)
#define OR(src, dst,z) los(or, src, dst, z)
#define ADC(src, dst,z) los(adc, src, dst, z)
#define SBB(src, dst,z) los(sbb, src, dst, z)
#define AND(src, dst,z) los(and, src, dst, z)
#define SUB(src, dst,z) los(sub, src, dst, z)
#define XOR(src, dst,z) los(xor, src, dst, z)
#define CMP(src, dst,z) lo(sub, src, dst, z)
#define TEST(src, dst,z) lo(and, src, dst, z)
#define NOT(val,z) load(val,z); gz(not, z); store(val,z)
#define NEG(val,z) imm = 0; load(imm,z); op(sub, val,z); store(val,z)
#define POP(thing,z) gg(pop, saved_ip); store(thing, z)
#define PUSH(thing,z) load(thing, z); gg(push, saved_ip)
#define INC(val,z) load(val, z); gz(inc, z); store(val, z)
#define DEC(val,z) load(val, z); gz(dec, z); store(val, z)
#define fake_ip (state->ip | (1ul << 63))
#define jump_ips(off1, off2) \
state->jump_ip[0] = state->size + off1; \
if (off2 != 0) \
state->jump_ip[1] = state->size + off2
#define JMP(loc) load(loc, OP_SIZE); g(jmp_indir); end_block = true
#define JMP_REL(off) gg(jmp, fake_ip + off); jump_ips(-1, 0); end_block = true
#define JCXZ_REL(off) ggg(jcxz, fake_ip + off, fake_ip); jump_ips(-2, -1); end_block = true
#define jcc(cc, to, else) gagg(jmp, cond_##cc, to, else); jump_ips(-2, -1); end_block = true
#define J_REL(cc, off) jcc(cc, fake_ip + off, fake_ip)
#define JN_REL(cc, off) jcc(cc, fake_ip, fake_ip + off)
// saved_ip: for use with page fault handler;
// -1: will be patched to block address in gen_end();
// fake_ip: the first one is the return address, used for saving to stack and verifying the cached ip in return cache is correct;
// fake_ip: the second one is the return target, patchable by return chaining.
#define CALL(loc) do { \
load(loc, OP_SIZE); \
ggggg(call_indir, saved_ip, -1, fake_ip, fake_ip); \
state->block_patch_ip = state->size - 3; \
jump_ips(-1, 0); \
end_block = true; \
} while (0)
// the first four arguments are the same with CALL,
// the last one is the call target, patchable by return chaining.
#define CALL_REL(off) do { \
gggggg(call, saved_ip, -1, fake_ip, fake_ip, fake_ip + off); \
state->block_patch_ip = state->size - 4; \
jump_ips(-2, -1); \
end_block = true; \
} while (0)
#define RET_NEAR(imm) ggg(ret, saved_ip, 4 + imm); end_block = true
#define INT(code) ggg(interrupt, (uint8_t) code, state->ip); end_block = true
#define SET(cc, dst) ga(set, cond_##cc); store(dst, 8)
#define SETN(cc, dst) ga(setn, cond_##cc); store(dst, 8)
// wins the prize for the most annoying instruction to generate
#define CMOV(cc, src, dst,z) do { \
gag(skipn, cond_##cc, 0); \
int start = state->size; \
load(src, z); store(dst, z); \
state->block->code[start - 1] = (state->size - start) * sizeof(long); \
} while (0)
#define CMOVN(cc, src, dst,z) do { \
gag(skip, cond_##cc, 0); \
int start = state->size; \
load(src, z); store(dst, z); \
state->block->code[start - 1] = (state->size - start) * sizeof(long); \
} while (0)
#define PUSHF() g(pushf)
#define POPF() g(popf)
#define SAHF g(sahf)
#define CLD g(cld)
#define STD g(std)
#define MUL18(val,z) MUL1(val,z)
#define MUL1(val,z) load(val, z); gz(mul, z)
#define IMUL1(val,z) load(val, z); gz(imul1, z)
#define DIV(val, z) load(val, z); gz(div, z)
#define IDIV(val, z) load(val, z); gz(idiv, z)
#define IMUL3(times, src, dst,z) load(src, z); op(imul, times, z); store(dst, z)
#define IMUL2(val, reg,z) IMUL3(val, reg, reg, z)
#define CVT ga(cvt, sz(oz))
#define CVTE ga(cvte, sz(oz))
#define ROL(count, val,z) los(rol, count, val, z)
#define ROR(count, val,z) los(ror, count, val, z)
#define RCL(count, val,z) los(rcl, count, val, z)
#define RCR(count, val,z) los(rcr, count, val, z)
#define SHL(count, val,z) los(shl, count, val, z)
#define SHR(count, val,z) los(shr, count, val, z)
#define SAR(count, val,z) los(sar, count, val, z)
#define SHLD(count, extra, dst,z) \
load(dst,z); \
if (arg_##count == arg_reg_c) op(shld_cl, extra,z); \
else { op(shld_imm, extra,z); GEN(imm); } \
store(dst,z)
#define SHRD(count, extra, dst,z) \
load(dst,z); \
if (arg_##count == arg_reg_c) op(shrd_cl, extra,z); \
else { op(shrd_imm, extra,z); GEN(imm); } \
store(dst,z)
#define BT(bit, val,z) lo(bt, val, bit, z)
#define BTC(bit, val,z) lo(btc, val, bit, z)
#define BTS(bit, val,z) lo(bts, val, bit, z)
#define BTR(bit, val,z) lo(btr, val, bit, z)
#define BSF(src, dst,z) los(bsf, src, dst, z)
#define BSR(src, dst,z) los(bsr, src, dst, z)
#define BSWAP(dst) ga(bswap, arg_##dst)
#define strop(op, rep, z) gag(op, sz(z) * size_count + rep_##rep, saved_ip)
#define STR(op, z) strop(op, once, z)
#define REP(op, z) strop(op, rep, z)
#define REPZ(op, z) strop(op, repz, z)
#define REPNZ(op, z) strop(op, repnz, z)
#define CMPXCHG(src, dst,z) load(src, z); op(cmpxchg, dst, z)
#define CMPXCHG8B(dst,z) g_addr(); gg(cmpxchg8b, saved_ip)
#define XADD(src, dst,z) XCHG(src, dst,z); ADD(src, dst,z)
void helper_rdtsc(struct cpu_state *cpu);
#define RDTSC h(helper_rdtsc)
#define CPUID() g(cpuid)
// atomic
#define atomic_op(type, src, dst,z) load(src, z); op(atomic_##type, dst, z)
#define ATOMIC_ADD(src, dst,z) atomic_op(add, src, dst, z)
#define ATOMIC_OR(src, dst,z) atomic_op(or, src, dst, z)
#define ATOMIC_ADC(src, dst,z) atomic_op(adc, src, dst, z)
#define ATOMIC_SBB(src, dst,z) atomic_op(sbb, src, dst, z)
#define ATOMIC_AND(src, dst,z) atomic_op(and, src, dst, z)
#define ATOMIC_SUB(src, dst,z) atomic_op(sub, src, dst, z)
#define ATOMIC_XOR(src, dst,z) atomic_op(xor, src, dst, z)
#define ATOMIC_INC(val,z) op(atomic_inc, val, z)
#define ATOMIC_DEC(val,z) op(atomic_dec, val, z)
#define ATOMIC_CMPXCHG(src, dst,z) atomic_op(cmpxchg, src, dst, z)
#define ATOMIC_XADD(src, dst,z) load(src, z); op(atomic_xadd, dst, z); store(src, z)
#define ATOMIC_BTC(bit, val,z) lo(atomic_btc, val, bit, z)
#define ATOMIC_BTS(bit, val,z) lo(atomic_bts, val, bit, z)
#define ATOMIC_BTR(bit, val,z) lo(atomic_btr, val, bit, z)
#define ATOMIC_CMPXCHG8B(dst,z) g_addr(); gg(atomic_cmpxchg8b, saved_ip)
// fpu
#define st_0 0
#define st_i modrm.rm_opcode
#define FLD() hh(fpu_ld, st_i);
#define FILD(val,z) h_read(fpu_ild, z)
#define FLDM(val,z) h_read(fpu_ldm, z)
#define FSTM(dst,z) h_write(fpu_stm, z)
#define FIST(dst,z) h_write(fpu_ist, z)
#define FXCH() hh(fpu_xch, st_i)
#define FCOM() hh(fpu_com, st_i)
#define FCOMM(val,z) h_read(fpu_comm, z)
#define FICOM(val,z) h_read(fpu_icom, z)
#define FUCOM() hh(fpu_ucom, st_i)
#define FUCOMI() hh(fpu_ucomi, st_i)
#define FCOMI() hh(fpu_comi, st_i)
#define FTST() h(fpu_tst)
#define FXAM() h(fpu_xam)
#define FST() hh(fpu_st, st_i)
#define FCHS() h(fpu_chs)
#define FABS() h(fpu_abs)
#define FLDC(what) hh(fpu_ldc, fconst_##what)
#define FPREM() h(fpu_prem)
#define FRNDINT() h(fpu_rndint)
#define FSCALE() h(fpu_scale)
#define FSQRT() h(fpu_sqrt)
#define FYL2X() h(fpu_yl2x)
#define F2XM1() h(fpu_2xm1)
#define FSTSW(dst) if (arg_##dst == arg_reg_a) g(fstsw_ax); else UNDEFINED
#define FSTCW(dst) if (arg_##dst == arg_reg_a) UNDEFINED; else h_write(fpu_stcw, 16)
#define FLDCW(dst) if (arg_##dst == arg_reg_a) UNDEFINED; else h_read(fpu_ldcw, 16)
#define FSTENV(val,z) h_write(fpu_stenv, z)
#define FLDENV(val,z) h_write(fpu_ldenv, z)
#define FSAVE(val,z) h_write(fpu_save, z)
#define FRESTORE(val,z) h_write(fpu_restore, z)
#define FPOP h(fpu_pop)
#define FINCSTP() h(fpu_incstp)
#define FADD(src, dst) hhh(fpu_add, src, dst)
#define FIADD(val,z) h_read(fpu_iadd, z)
#define FADDM(val,z) h_read(fpu_addm, z)
#define FSUB(src, dst) hhh(fpu_sub, src, dst)
#define FSUBM(val,z) h_read(fpu_subm, z)
#define FISUB(val,z) h_read(fpu_isub, z)
#define FISUBR(val,z) h_read(fpu_isubr, z)
#define FSUBR(src, dst) hhh(fpu_subr, src, dst)
#define FSUBRM(val,z) h_read(fpu_subrm, z)
#define FMUL(src, dst) hhh(fpu_mul, src, dst)
#define FIMUL(val,z) h_read(fpu_imul, z)
#define FMULM(val,z) h_read(fpu_mulm, z)
#define FDIV(src, dst) hhh(fpu_div, src, dst)
#define FIDIV(val,z) h_read(fpu_idiv, z)
#define FDIVM(val,z) h_read(fpu_divm, z)
#define FDIVR(src, dst) hhh(fpu_divr, src, dst)
#define FIDIVR(val,z) h_read(fpu_idivr, z)
#define FDIVRM(val,z) h_read(fpu_divrm, z)
#define FPATAN() h(fpu_patan)
#define FSIN() h(fpu_sin)
#define FCOS() h(fpu_cos)
// vector
// sync with VEC_ARG_LIST
enum vec_arg {
vec_arg_xmm, vec_arg_reg, vec_arg_imm, vec_arg_count,
vec_arg_mem,
};
static inline enum vec_arg vecarg(enum arg arg, struct modrm *modrm) {
switch (arg) {
case arg_modrm_reg:
return vec_arg_reg;
case arg_imm:
return vec_arg_imm;
case arg_xmm_modrm_reg:
return vec_arg_xmm;
case arg_modrm_val:
if (modrm->type == modrm_reg)
return vec_arg_reg;
return vec_arg_mem;
case arg_xmm_modrm_val:
if (modrm->type == modrm_reg)
return vec_arg_xmm;
return vec_arg_mem;
default:
die("unimplemented vecarg");
}
}
static inline bool gen_vec(enum arg rm, enum arg reg, void (*helper)(), gadget_t (*helper_gadgets_mem)[vec_arg_count], struct gen_state *state, struct modrm *modrm, uint8_t imm, dword_t saved_ip, bool seg_gs) {
enum vec_arg v_reg = vecarg(reg, modrm);
enum vec_arg v_rm = vecarg(rm, modrm);
gadget_t gadget;
if (v_rm == vec_arg_mem) {
gadget = (*helper_gadgets_mem)[v_reg];
} else {
extern gadget_t vec_helper_reg_gadgets[vec_arg_count][vec_arg_count];
gadget = vec_helper_reg_gadgets[v_reg][v_rm];
}
if (gadget == NULL) {
UNDEFINED;
}
switch (v_rm) {
case vec_arg_xmm:
GEN(gadget);
GEN(helper);
GEN((modrm->opcode * sizeof(union xmm_reg))
| (modrm->rm_opcode * sizeof(union xmm_reg) << 8));
break;
case vec_arg_mem:
gen_addr(state, modrm, seg_gs, saved_ip);
GEN(gadget);
GEN(saved_ip);
GEN(helper);
GEN(modrm->opcode * sizeof(union xmm_reg));
break;
case vec_arg_imm:
// TODO: support immediates and opcode
GEN(gadget);
GEN(helper);
GEN((modrm->rm_opcode * sizeof(union xmm_reg))
| (((uint16_t) imm) << 8));
break;
default: die("unimplemented vecarg");
}
return true;
}
#define _v(src, dst, helper, helper_gadgets, z) do { \
extern gadget_t helper_gadgets[vec_arg_count]; \
if (!gen_vec(src, dst, (void (*)()) helper, &helper_gadgets, state, &modrm, 0, saved_ip, seg_gs)) return false; \
} while (0)
#define _v_imm(imm, dst, helper, helper_gadgets, z) do { \
extern gadget_t helper_gadgets[vec_arg_count]; \
if (!gen_vec(arg_imm, dst, (void (*)()) helper, &helper_gadgets, state, &modrm, imm, saved_ip, seg_gs)) return false; \
} while (0)
#define v(op, src, dst,z) _v(arg_##src, arg_##dst, vec_##op##z, vec_helper_load##z##_gadgets, z)
#define v_imm(op, imm, dst,z) _v_imm(imm, arg_##dst, vec_##op##z, vec_helper_load##z##_gadgets, z)
#define v_write(op, src, dst,z) _v(arg_##dst, arg_##src, vec_##op##z, vec_helper_store##z##_gadgets, z)
#define VLOAD(src, dst,z) v(load, src, dst,z)
#define VZLOAD(src, dst,z) v(zload, src, dst, z)
#define VLOAD_PADNOTMEM(src, dst, z) do { \
if (arg_##src == arg_xmm_modrm_val && modrm.type != modrm_mem) { \
VZLOAD(src, dst, z); \
} else { \
VLOAD(src, dst, z); \
} \
} while (0)
#define VLOAD_PADMEM(src, dst, z) do { \
if (arg_##src == arg_xmm_modrm_val && modrm.type != modrm_reg) { \
VZLOAD(src, dst, z); \
} else { \
VLOAD(src, dst, z); \
} \
} while (0)
#define VSTORE(src, dst,z) v_write(store, src, dst,z)
#define VCOMPARE(src, dst,z) v(compare, src, dst,z)
#define VSHIFTR_IMM(reg, amount, z) v_imm(imm_shiftr, amount, reg,z)
#define VXOR(src, dst,z) v(xor, src, dst,z)
#define DECODER_RET int
#define DECODER_NAME gen_step
#define DECODER_ARGS struct gen_state *state, struct tlb *tlb
#define DECODER_PASS_ARGS state, tlb
#define OP_SIZE 32
#include "emu/decode.h"
#undef OP_SIZE
#define OP_SIZE 16
#include "emu/decode.h"
#undef OP_SIZE