ish/jit/gen.c
2018-10-30 14:41:39 -07:00

395 lines
14 KiB
C

#include <assert.h>
#include "jit/gen.h"
#include "emu/modrm.h"
#include "emu/cpuid.h"
#include "emu/fpu.h"
#include "emu/interrupt.h"
static void gen(struct gen_state *state, unsigned long thing) {
assert(state->size <= state->capacity);
if (state->size >= state->capacity) {
state->capacity *= 2;
struct jit_block *bigger_block = realloc(state->block,
sizeof(struct jit_block) + state->capacity * sizeof(unsigned long));
if (bigger_block == NULL) {
println("out of memory while jitting");
abort();
}
state->block = bigger_block;
}
assert(state->size < state->capacity);
state->block->code[state->size++] = thing;
}
void gen_start(addr_t addr, struct gen_state *state) {
state->capacity = JIT_BLOCK_INITIAL_CAPACITY;
state->size = 0;
state->ip = addr;
for (int i = 0; i <= 1; i++) {
state->jump_ip[i] = 0;
}
struct jit_block *block = malloc(sizeof(struct jit_block) + state->capacity * sizeof(unsigned long));
state->block = block;
block->addr = addr;
}
void gen_end(struct gen_state *state) {
struct jit_block *block = state->block;
for (int i = 0; i <= 1; i++) {
if (state->jump_ip[i] != 0) {
block->jump_ip[i] = &block->code[state->jump_ip[i]];
block->old_jump_ip[i] = *block->jump_ip[i];
} else {
block->jump_ip[i] = NULL;
}
list_init(&block->jumps_from[i]);
list_init(&block->jumps_from_links[i]);
}
if (block->addr != state->ip)
block->end_addr = state->ip - 1;
else
block->end_addr = block->addr;
list_init(&block->chain);
for (int i = 0; i <= 1; i++) {
list_init(&block->page[i]);
}
}
void gen_exit(struct gen_state *state) {
extern void gadget_exit();
// in case the last instruction didn't end the block
gen(state, (unsigned long) gadget_exit);
gen(state, state->ip);
}
#define DECLARE_LOCALS \
dword_t saved_ip = state->ip; \
dword_t addr_offset = 0; \
bool end_block = false; \
bool seg_gs = false
#define FINISH \
return !end_block
#define TRACEIP() TRACE("%d %08x\t", current->pid, state->ip);
#define _READIMM(name, size) \
if (!tlb_read(tlb, state->ip, &name, size/8)) SEGFAULT; \
state->ip += size/8
#define READMODRM if (!modrm_decode32(&state->ip, tlb, &modrm)) SEGFAULT
#define READADDR _READIMM(addr_offset, 32)
#define SEG_GS() seg_gs = true
// This should stay in sync with the definition of .gadget_array in gadgets.h
enum arg {
arg_reg_a, arg_reg_c, arg_reg_d, arg_reg_b, arg_reg_sp, arg_reg_bp, arg_reg_si, arg_reg_di,
arg_imm, arg_mem, arg_addr, arg_gs,
arg_count, arg_invalid,
// the following should not be synced with the list mentioned above (no gadgets implement them)
arg_modrm_val, arg_modrm_reg, arg_mem_addr, arg_1,
};
enum size {
size_8, size_16, size_32,
size_count,
size_64, size_80, // FIXME bonus sizes, fpu only at the moment
};
// sync with COND_LIST in control.S
enum cond {
cond_O, cond_B, cond_E, cond_BE, cond_S, cond_P, cond_L, cond_LE,
cond_count,
};
enum repeat {
rep_once, rep_repz, rep_repnz,
rep_count,
rep_rep = rep_repz,
};
typedef void (*gadget_t)();
#define GEN(thing) gen(state, (unsigned long) (thing))
#define g(g) do { extern void gadget_##g(); GEN(gadget_##g); } while (0)
#define gg(_g, a) do { g(_g); GEN(a); } while (0)
#define ggg(_g, a, b) do { g(_g); GEN(a); GEN(b); } while (0)
#define gggg(_g, a, b, c) do { g(_g); GEN(a); GEN(b); GEN(c); } while (0)
#define ga(g, i) do { extern gadget_t g##_gadgets[]; if (g##_gadgets[i] == NULL) UNDEFINED; GEN(g##_gadgets[i]); } while (0)
#define gag(g, i, a) do { ga(g, i); GEN(a); } while (0)
#define gagg(g, i, a, b) do { ga(g, i); GEN(a); GEN(b); } while (0)
#define gz(g, z) ga(g, sz(z))
#define h(h) gg(helper_0, h)
#define hh(h, a) ggg(helper_1, h, a)
#define hhh(h, a, b) gggg(helper_2, h, a, b)
#define h_read(h, z) do { g_addr(); gg_here(helper_read##z, h##z); } while (0)
#define h_write(h, z) do { g_addr(); gg_here(helper_write##z, h##z); } while (0)
#define gg_here(g, a) ggg(g, a, saved_ip)
#define UNDEFINED do { gg_here(interrupt, INT_UNDEFINED); return false; } while (0)
#define SEGFAULT do { gg_here(interrupt, INT_GPF); return false; } while (0)
static inline int sz(int size) {
switch (size) {
case 8: return size_8;
case 16: return size_16;
case 32: return size_32;
default: return -1;
}
}
bool gen_addr(struct gen_state *state, struct modrm *modrm, bool seg_gs, dword_t saved_ip) {
if (modrm->base == reg_none)
gg(addr_none, modrm->offset);
else
gag(addr, modrm->base, modrm->offset);
if (modrm->type == modrm_mem_si)
ga(si, modrm->index * 4 + modrm->shift);
if (seg_gs)
g(seg_gs);
return true;
}
#define g_addr() gen_addr(state, &modrm, seg_gs, saved_ip)
// this really wants to use all the locals of the decoder, which we can do
// really nicely in gcc using nested functions, but that won't work in clang,
// so we explicitly pass 500 arguments. sorry for the mess
static inline bool gen_op(struct gen_state *state, gadget_t *gadgets, enum arg arg, struct modrm *modrm, uint64_t *imm, int size, dword_t saved_ip, bool seg_gs, dword_t addr_offset) {
size = sz(size);
gadgets = gadgets + size * arg_count;
switch (arg) {
case arg_modrm_reg:
// TODO find some way to assert that this won't overflow?
arg = modrm->reg + arg_reg_a; break;
case arg_modrm_val:
if (modrm->type == modrm_reg)
arg = modrm->base + arg_reg_a;
else
arg = arg_mem;
break;
case arg_mem_addr:
arg = arg_mem;
modrm->type = modrm_mem;
modrm->base = reg_none;
modrm->offset = addr_offset;
break;
case arg_1:
arg = arg_imm;
*imm = 1;
break;
}
if (arg >= arg_count || gadgets[arg] == NULL) {
debugger;
UNDEFINED;
}
if (arg == arg_mem || arg == arg_addr) {
if (!gen_addr(state, modrm, seg_gs, saved_ip))
return false;
}
GEN(gadgets[arg]);
if (arg == arg_imm)
GEN(*imm);
else if (arg == arg_mem)
GEN(saved_ip);
return true;
}
#define op(type, thing, z) do { \
extern gadget_t type##_gadgets[]; \
if (!gen_op(state, type##_gadgets, arg_##thing, &modrm, &imm, z, saved_ip, seg_gs, addr_offset)) return false; \
} while (0)
#define load(thing, z) op(load, thing, z)
#define store(thing, z) op(store, thing, z)
// load-op-store
#define los(o, src, dst, z) load(dst, z); op(o, src, z); store(dst, z)
#define lo(o, src, dst, z) load(dst, z); op(o, src, z)
#define MOV(src, dst,z) load(src, z); store(dst, z)
#define MOVZX(src, dst,zs,zd) load(src, zs); gz(zero_extend, zs); store(dst, zd)
#define MOVSX(src, dst,zs,zd) load(src, zs); gz(sign_extend, zs); store(dst, zd)
#define XCHG(src, dst,z) los(xchg, src, dst, z)
#define ADD(src, dst,z) los(add, src, dst, z)
#define OR(src, dst,z) los(or, src, dst, z)
#define ADC(src, dst,z) los(adc, src, dst, z)
#define SBB(src, dst,z) los(sbb, src, dst, z)
#define AND(src, dst,z) los(and, src, dst, z)
#define SUB(src, dst,z) los(sub, src, dst, z)
#define XOR(src, dst,z) los(xor, src, dst, z)
#define CMP(src, dst,z) lo(sub, src, dst, z)
#define TEST(src, dst,z) lo(and, src, dst, z)
#define NOT(val,z) load(val,z); gz(not, z); store(val,z)
#define NEG(val,z) load(val,z); gz(neg, z); store(val,z)
#define POP(thing,z) gg(pop, saved_ip); store(thing, z)
#define PUSH(thing,z) load(thing, z); gg(push, saved_ip)
#define INC(val,z) load(val, z); gz(inc, z); store(val, z)
#define DEC(val,z) load(val, z); gz(dec, z); store(val, z)
#define fake_ip (state->ip | (1ul << 63))
#define jump_ips(off1, off2) \
state->jump_ip[0] = state->size + off1; \
if (off2 != 0) \
state->jump_ip[1] = state->size + off2
#define JMP(loc) load(loc, OP_SIZE); g(jmp_indir); end_block = true
#define JMP_REL(off) gg(jmp, fake_ip + off); jump_ips(-1, 0); end_block = true
#define JCXZ_REL(off) ggg(jcxz, fake_ip + off, fake_ip); jump_ips(-2, -1); end_block = true
#define jcc(cc, to, else) gagg(jmp, cond_##cc, to, else); jump_ips(-2, -1); end_block = true
#define J_REL(cc, off) jcc(cc, fake_ip + off, fake_ip)
#define JN_REL(cc, off) jcc(cc, fake_ip, fake_ip + off)
#define CALL(loc) load(loc, OP_SIZE); ggg(call_indir, saved_ip, fake_ip); end_block = true
#define CALL_REL(off) gggg(call, saved_ip, fake_ip + off, fake_ip); jump_ips(-2, 0); end_block = true
#define RET_NEAR(imm) ggg(ret, saved_ip, 4 + imm); end_block = true
#define INT(code) ggg(interrupt, (uint8_t) code, state->ip); end_block = true
#define SET(cc, dst) ga(set, cond_##cc); store(dst, 8)
#define SETN(cc, dst) ga(setn, cond_##cc); store(dst, 8)
// wins the prize for the most annoying instruction to generate
#define CMOV(cc, src, dst,z) do { \
gag(skipn, cond_##cc, 0); \
int start = state->size; \
load(src, z); store(dst, z); \
state->block->code[start - 1] = (state->size - start) * sizeof(long); \
} while (0)
#define CMOVN(cc, src, dst,z) do { \
gag(skip, cond_##cc, 0); \
int start = state->size; \
load(src, z); store(dst, z); \
state->block->code[start - 1] = (state->size - start) * sizeof(long); \
} while (0)
#define PUSHF() g(pushf)
#define POPF() g(popf)
#define SAHF g(sahf)
#define CLD g(cld)
#define STD g(std)
#define MUL18(val,z) MUL1(val,z)
#define MUL1(val,z) load(val, z); gz(mul, z)
#define IMUL1(val,z) load(val, z); gz(imul1, z)
#define DIV(val, z) load(val, z); gz(div, z)
#define IDIV(val, z) load(val, z); gz(idiv, z)
#define IMUL3(times, src, dst,z) load(src, z); op(imul, times, z); store(dst, z)
#define IMUL2(val, reg,z) IMUL3(val, reg, reg, z)
#define CVT ga(cvt, sz(oz))
#define CVTE ga(cvte, sz(oz))
#define ROL(count, val,z) los(rol, count, val, z)
#define ROR(count, val,z) los(ror, count, val, z)
#define SHL(count, val,z) los(shl, count, val, z)
#define SHR(count, val,z) los(shr, count, val, z)
#define SAR(count, val,z) los(sar, count, val, z)
#define SHLD(count, extra, dst,z) \
load(dst,z); \
if (arg_##count == arg_reg_c) op(shld_cl, extra,z); \
else { op(shld_imm, extra,z); GEN(imm); } \
store(dst,z)
#define SHRD(count, extra, dst,z) \
load(dst,z); \
if (arg_##count == arg_reg_c) op(shrd_cl, extra,z); \
else { op(shrd_imm, extra,z); GEN(imm); } \
store(dst,z)
#define BT(bit, val,z) lo(bt, val, bit, z)
#define BTC(bit, val,z) UNDEFINED
#define BTS(bit, val,z) UNDEFINED
#define BTR(bit, val,z) UNDEFINED
#define BSF(src, dst,z) los(bsf, src, dst, z)
#define BSR(src, dst,z) los(bsr, src, dst, z)
#define BSWAP(dst) ga(bswap, arg_##dst)
#define strop(op, rep, z) gag(op, sz(z) * size_count + rep_##rep, saved_ip)
#define STR(op, z) strop(op, once, z)
#define REP(op, z) strop(op, rep, z)
#define REPZ(op, z) strop(op, repz, z)
#define REPNZ(op, z) strop(op, repnz, z)
#define CMPXCHG(src, dst,z) load(src, z); op(cmpxchg, dst, z)
#define XADD(src, dst,z) XCHG(src, dst,z); ADD(src, dst,z)
#define RDTSC g(rdtsc)
#define CPUID() g(cpuid)
// atomic
#define atomic_op(type, src, dst,z) load(src, z); op(atomic_##type, dst, z)
#define ATOMIC_ADD(src, dst,z) atomic_op(add, src, dst, z)
#define ATOMIC_OR(src, dst,z) atomic_op(or, src, dst, z)
#define ATOMIC_ADC(src, dst,z) atomic_op(adc, src, dst, z)
#define ATOMIC_SBB(src, dst,z) atomic_op(sbb, src, dst, z)
#define ATOMIC_AND(src, dst,z) atomic_op(and, src, dst, z)
#define ATOMIC_SUB(src, dst,z) atomic_op(sub, src, dst, z)
#define ATOMIC_XOR(src, dst,z) atomic_op(xor, src, dst, z)
#define ATOMIC_INC(val,z) op(atomic_inc, val, z)
#define ATOMIC_DEC(val,z) op(atomic_dec, val, z)
#define ATOMIC_CMPXCHG(src, dst,z) atomic_op(cmpxchg, src, dst, z)
#define ATOMIC_XADD(src, dst,z) load(src, z); op(atomic_xadd, dst, z); store(src, z)
// sse
#define XORP(src, dst) UNDEFINED
#define PSRLQ(src, dst) UNDEFINED
#define PCMPEQD(src, dst) UNDEFINED
#define PADD(src, dst) UNDEFINED
#define PSUB(src, dst) UNDEFINED
#define MOVQ(src, dst) UNDEFINED
#define MOVD(src, dst) UNDEFINED
#define CVTTSD2SI(src, dst) UNDEFINED
// fpu
#define st_0 0
#define st_i modrm.rm_opcode
#define FLD() hh(fpu_ld, st_i);
#define FILD(val,z) h_read(fpu_ild, z)
#define FLDM(val,z) h_read(fpu_ldm, z)
#define FSTM(dst,z) h_write(fpu_stm, z)
#define FIST(dst,z) h_write(fpu_ist, z)
#define FXCH() hh(fpu_xch, st_i)
#define FUCOM() hh(fpu_ucom, st_i)
#define FUCOMI() UNDEFINED
#define FST() hh(fpu_st, st_i)
#define FCHS() h(fpu_chs)
#define FABS() h(fpu_abs)
#define FLDC(what) hh(fpu_ldc, fconst_##what)
#define FPREM() h(fpu_prem)
#define FRNDINT() h(fpu_rndint)
#define FYL2X() h(fpu_yl2x)
#define FSTSW(dst) if (arg_##dst == arg_reg_a) g(fstsw_ax); else UNDEFINED
#define FSTCW(dst) if (arg_##dst == arg_reg_a) UNDEFINED; else h_write(fpu_stcw, 16)
#define FLDCW(dst) if (arg_##dst == arg_reg_a) UNDEFINED; else h_read(fpu_ldcw, 16)
#define FPOP h(fpu_pop)
#define FADD(src, dst) hhh(fpu_add, src, dst)
#define FIADD(val,z) h_read(fpu_iadd, z)
#define FADDM(val,z) h_read(fpu_addm, z)
#define FSUB(src, dst) hhh(fpu_sub, src, dst)
#define FSUBM(val,z) h_read(fpu_subm, z)
#define FISUB(val,z) h_read(fpu_isub, z)
#define FISUBR(val,z) h_read(fpu_isubr, z)
#define FSUBR(src, dst) hhh(fpu_subr, src, dst)
#define FSUBRM(val,z) h_read(fpu_subrm, z)
#define FMUL(src, dst) hhh(fpu_mul, src, dst)
#define FIMUL(val,z) h_read(fpu_imul, z)
#define FMULM(val,z) h_read(fpu_mulm, z)
#define FDIV(src, dst) hhh(fpu_div, src, dst)
#define FIDIV(val,z) h_read(fpu_idiv, z)
#define FIDIVR(val,z) h_read(fpu_idivr, z)
#define FDIVM(val,z) h_read(fpu_divm, z)
#define FDIVRM(val,z) h_read(fpu_divrm, z)
#define DECODER_RET int
#define DECODER_NAME gen_step
#define DECODER_ARGS struct gen_state *state, struct tlb *tlb
#define DECODER_PASS_ARGS state, tlb
#define OP_SIZE 32
#include "emu/decode.h"
#undef OP_SIZE
#define OP_SIZE 16
#include "emu/decode.h"
#undef OP_SIZE