mirror of
https://github.com/ish-app/ish.git
synced 2026-01-25 14:06:40 +00:00
543 lines
19 KiB
C
543 lines
19 KiB
C
#include <assert.h>
|
|
#include "jit/gen.h"
|
|
#include "emu/modrm.h"
|
|
#include "emu/cpuid.h"
|
|
#include "emu/fpu.h"
|
|
#include "emu/sse.h"
|
|
#include "emu/interrupt.h"
|
|
|
|
static void gen(struct gen_state *state, unsigned long thing) {
|
|
assert(state->size <= state->capacity);
|
|
if (state->size >= state->capacity) {
|
|
state->capacity *= 2;
|
|
struct jit_block *bigger_block = realloc(state->block,
|
|
sizeof(struct jit_block) + state->capacity * sizeof(unsigned long));
|
|
if (bigger_block == NULL) {
|
|
die("out of memory while jitting");
|
|
}
|
|
state->block = bigger_block;
|
|
}
|
|
assert(state->size < state->capacity);
|
|
state->block->code[state->size++] = thing;
|
|
}
|
|
|
|
void gen_start(addr_t addr, struct gen_state *state) {
|
|
state->capacity = JIT_BLOCK_INITIAL_CAPACITY;
|
|
state->size = 0;
|
|
state->ip = addr;
|
|
for (int i = 0; i <= 1; i++) {
|
|
state->jump_ip[i] = 0;
|
|
}
|
|
state->block_patch_ip = 0;
|
|
|
|
struct jit_block *block = malloc(sizeof(struct jit_block) + state->capacity * sizeof(unsigned long));
|
|
state->block = block;
|
|
block->addr = addr;
|
|
}
|
|
|
|
void gen_end(struct gen_state *state) {
|
|
struct jit_block *block = state->block;
|
|
for (int i = 0; i <= 1; i++) {
|
|
if (state->jump_ip[i] != 0) {
|
|
block->jump_ip[i] = &block->code[state->jump_ip[i]];
|
|
block->old_jump_ip[i] = *block->jump_ip[i];
|
|
} else {
|
|
block->jump_ip[i] = NULL;
|
|
}
|
|
|
|
list_init(&block->jumps_from[i]);
|
|
list_init(&block->jumps_from_links[i]);
|
|
}
|
|
if (state->block_patch_ip != 0) {
|
|
block->code[state->block_patch_ip] = (unsigned long) block;
|
|
}
|
|
if (block->addr != state->ip)
|
|
block->end_addr = state->ip - 1;
|
|
else
|
|
block->end_addr = block->addr;
|
|
list_init(&block->chain);
|
|
block->is_jetsam = false;
|
|
for (int i = 0; i <= 1; i++) {
|
|
list_init(&block->page[i]);
|
|
}
|
|
}
|
|
|
|
void gen_exit(struct gen_state *state) {
|
|
extern void gadget_exit(void);
|
|
// in case the last instruction didn't end the block
|
|
gen(state, (unsigned long) gadget_exit);
|
|
gen(state, state->ip);
|
|
}
|
|
|
|
#define DECLARE_LOCALS \
|
|
dword_t saved_ip = state->ip; \
|
|
dword_t addr_offset = 0; \
|
|
bool end_block = false; \
|
|
bool seg_gs = false
|
|
|
|
#define FINISH \
|
|
return !end_block
|
|
|
|
#define RESTORE_IP state->ip = saved_ip
|
|
#define _READIMM(name, size) \
|
|
if (!tlb_read(tlb, state->ip, &name, size/8)) SEGFAULT; \
|
|
state->ip += size/8
|
|
|
|
#define READMODRM if (!modrm_decode32(&state->ip, tlb, &modrm)) SEGFAULT
|
|
#define READADDR _READIMM(addr_offset, 32)
|
|
#define SEG_GS() seg_gs = true
|
|
|
|
// This should stay in sync with the definition of .gadget_array in gadgets.h
|
|
enum arg {
|
|
arg_reg_a, arg_reg_c, arg_reg_d, arg_reg_b, arg_reg_sp, arg_reg_bp, arg_reg_si, arg_reg_di,
|
|
arg_imm, arg_mem, arg_addr, arg_gs,
|
|
arg_count, arg_invalid,
|
|
// the following should not be synced with the list mentioned above (no gadgets implement them)
|
|
arg_modrm_val, arg_modrm_reg,
|
|
arg_xmm_modrm_val, arg_xmm_modrm_reg,
|
|
arg_mem_addr, arg_1,
|
|
};
|
|
|
|
enum size {
|
|
size_8, size_16, size_32,
|
|
size_count,
|
|
size_64, size_80, size_128, // bonus sizes
|
|
};
|
|
|
|
// sync with COND_LIST in control.S
|
|
enum cond {
|
|
cond_O, cond_B, cond_E, cond_BE, cond_S, cond_P, cond_L, cond_LE,
|
|
cond_count,
|
|
};
|
|
|
|
enum repeat {
|
|
rep_once, rep_repz, rep_repnz,
|
|
rep_count,
|
|
rep_rep = rep_repz,
|
|
};
|
|
|
|
typedef void (*gadget_t)(void);
|
|
|
|
#define GEN(thing) gen(state, (unsigned long) (thing))
|
|
#define g(g) do { extern void gadget_##g(void); GEN(gadget_##g); } while (0)
|
|
#define gg(_g, a) do { g(_g); GEN(a); } while (0)
|
|
#define ggg(_g, a, b) do { g(_g); GEN(a); GEN(b); } while (0)
|
|
#define gggg(_g, a, b, c) do { g(_g); GEN(a); GEN(b); GEN(c); } while (0)
|
|
#define ggggg(_g, a, b, c, d) do { g(_g); GEN(a); GEN(b); GEN(c); GEN(d); } while (0)
|
|
#define gggggg(_g, a, b, c, d, e) do { g(_g); GEN(a); GEN(b); GEN(c); GEN(d); GEN(e); } while (0)
|
|
#define ga(g, i) do { extern gadget_t g##_gadgets[]; if (g##_gadgets[i] == NULL) UNDEFINED; GEN(g##_gadgets[i]); } while (0)
|
|
#define gag(g, i, a) do { ga(g, i); GEN(a); } while (0)
|
|
#define gagg(g, i, a, b) do { ga(g, i); GEN(a); GEN(b); } while (0)
|
|
#define gz(g, z) ga(g, sz(z))
|
|
#define h(h) gg(helper_0, h)
|
|
#define hh(h, a) ggg(helper_1, h, a)
|
|
#define hhh(h, a, b) gggg(helper_2, h, a, b)
|
|
#define h_read(h, z) do { g_addr(); gg_here(helper_read##z, h##z); } while (0)
|
|
#define h_write(h, z) do { g_addr(); gg_here(helper_write##z, h##z); } while (0)
|
|
#define gg_here(g, a) ggg(g, a, saved_ip)
|
|
#define UNDEFINED do { gg_here(interrupt, INT_UNDEFINED); return false; } while (0)
|
|
#define SEGFAULT do { gg_here(interrupt, INT_GPF); return false; } while (0)
|
|
|
|
static inline int sz(int size) {
|
|
switch (size) {
|
|
case 8: return size_8;
|
|
case 16: return size_16;
|
|
case 32: return size_32;
|
|
default: return -1;
|
|
}
|
|
}
|
|
|
|
bool gen_addr(struct gen_state *state, struct modrm *modrm, bool seg_gs, dword_t saved_ip) {
|
|
if (modrm->base == reg_none)
|
|
gg(addr_none, modrm->offset);
|
|
else
|
|
gag(addr, modrm->base, modrm->offset);
|
|
if (modrm->type == modrm_mem_si)
|
|
ga(si, modrm->index * 4 + modrm->shift);
|
|
if (seg_gs)
|
|
g(seg_gs);
|
|
return true;
|
|
}
|
|
#define g_addr() gen_addr(state, &modrm, seg_gs, saved_ip)
|
|
|
|
// this really wants to use all the locals of the decoder, which we can do
|
|
// really nicely in gcc using nested functions, but that won't work in clang,
|
|
// so we explicitly pass 500 arguments. sorry for the mess
|
|
static inline bool gen_op(struct gen_state *state, gadget_t *gadgets, enum arg arg, struct modrm *modrm, uint64_t *imm, int size, dword_t saved_ip, bool seg_gs, dword_t addr_offset) {
|
|
size = sz(size);
|
|
gadgets = gadgets + size * arg_count;
|
|
|
|
switch (arg) {
|
|
case arg_modrm_reg:
|
|
// TODO find some way to assert that this won't overflow?
|
|
arg = modrm->reg + arg_reg_a; break;
|
|
case arg_modrm_val:
|
|
if (modrm->type == modrm_reg)
|
|
arg = modrm->base + arg_reg_a;
|
|
else
|
|
arg = arg_mem;
|
|
break;
|
|
case arg_mem_addr:
|
|
arg = arg_mem;
|
|
modrm->type = modrm_mem;
|
|
modrm->base = reg_none;
|
|
modrm->offset = addr_offset;
|
|
break;
|
|
case arg_1:
|
|
arg = arg_imm;
|
|
*imm = 1;
|
|
break;
|
|
}
|
|
if (arg >= arg_count || gadgets[arg] == NULL) {
|
|
UNDEFINED;
|
|
}
|
|
if (arg == arg_mem || arg == arg_addr) {
|
|
if (!gen_addr(state, modrm, seg_gs, saved_ip))
|
|
return false;
|
|
}
|
|
GEN(gadgets[arg]);
|
|
if (arg == arg_imm)
|
|
GEN(*imm);
|
|
else if (arg == arg_mem)
|
|
GEN(saved_ip);
|
|
return true;
|
|
}
|
|
#define op(type, thing, z) do { \
|
|
extern gadget_t type##_gadgets[]; \
|
|
if (!gen_op(state, type##_gadgets, arg_##thing, &modrm, &imm, z, saved_ip, seg_gs, addr_offset)) return false; \
|
|
} while (0)
|
|
|
|
#define load(thing, z) op(load, thing, z)
|
|
#define store(thing, z) op(store, thing, z)
|
|
// load-op-store
|
|
#define los(o, src, dst, z) load(dst, z); op(o, src, z); store(dst, z)
|
|
#define lo(o, src, dst, z) load(dst, z); op(o, src, z)
|
|
|
|
#define MOV(src, dst,z) load(src, z); store(dst, z)
|
|
#define MOVZX(src, dst,zs,zd) load(src, zs); gz(zero_extend, zs); store(dst, zd)
|
|
#define MOVSX(src, dst,zs,zd) load(src, zs); gz(sign_extend, zs); store(dst, zd)
|
|
// xchg must generate in this order to be atomic
|
|
#define XCHG(src, dst,z) load(src, z); op(xchg, dst, z); store(src, z)
|
|
|
|
#define ADD(src, dst,z) los(add, src, dst, z)
|
|
#define OR(src, dst,z) los(or, src, dst, z)
|
|
#define ADC(src, dst,z) los(adc, src, dst, z)
|
|
#define SBB(src, dst,z) los(sbb, src, dst, z)
|
|
#define AND(src, dst,z) los(and, src, dst, z)
|
|
#define SUB(src, dst,z) los(sub, src, dst, z)
|
|
#define XOR(src, dst,z) los(xor, src, dst, z)
|
|
#define CMP(src, dst,z) lo(sub, src, dst, z)
|
|
#define TEST(src, dst,z) lo(and, src, dst, z)
|
|
#define NOT(val,z) load(val,z); gz(not, z); store(val,z)
|
|
#define NEG(val,z) imm = 0; load(imm,z); op(sub, val,z); store(val,z)
|
|
|
|
#define POP(thing,z) gg(pop, saved_ip); store(thing, z)
|
|
#define PUSH(thing,z) load(thing, z); gg(push, saved_ip)
|
|
|
|
#define INC(val,z) load(val, z); gz(inc, z); store(val, z)
|
|
#define DEC(val,z) load(val, z); gz(dec, z); store(val, z)
|
|
|
|
#define fake_ip (state->ip | (1ul << 63))
|
|
|
|
#define jump_ips(off1, off2) \
|
|
state->jump_ip[0] = state->size + off1; \
|
|
if (off2 != 0) \
|
|
state->jump_ip[1] = state->size + off2
|
|
#define JMP(loc) load(loc, OP_SIZE); g(jmp_indir); end_block = true
|
|
#define JMP_REL(off) gg(jmp, fake_ip + off); jump_ips(-1, 0); end_block = true
|
|
#define JCXZ_REL(off) ggg(jcxz, fake_ip + off, fake_ip); jump_ips(-2, -1); end_block = true
|
|
#define jcc(cc, to, else) gagg(jmp, cond_##cc, to, else); jump_ips(-2, -1); end_block = true
|
|
#define J_REL(cc, off) jcc(cc, fake_ip + off, fake_ip)
|
|
#define JN_REL(cc, off) jcc(cc, fake_ip, fake_ip + off)
|
|
|
|
// saved_ip: for use with page fault handler;
|
|
// -1: will be patched to block address in gen_end();
|
|
// fake_ip: the first one is the return address, used for saving to stack and verifying the cached ip in return cache is correct;
|
|
// fake_ip: the second one is the return target, patchable by return chaining.
|
|
#define CALL(loc) do { \
|
|
load(loc, OP_SIZE); \
|
|
ggggg(call_indir, saved_ip, -1, fake_ip, fake_ip); \
|
|
state->block_patch_ip = state->size - 3; \
|
|
jump_ips(-1, 0); \
|
|
end_block = true; \
|
|
} while (0)
|
|
// the first four arguments are the same with CALL,
|
|
// the last one is the call target, patchable by return chaining.
|
|
#define CALL_REL(off) do { \
|
|
gggggg(call, saved_ip, -1, fake_ip, fake_ip, fake_ip + off); \
|
|
state->block_patch_ip = state->size - 4; \
|
|
jump_ips(-2, -1); \
|
|
end_block = true; \
|
|
} while (0)
|
|
#define RET_NEAR(imm) ggg(ret, saved_ip, 4 + imm); end_block = true
|
|
#define INT(code) ggg(interrupt, (uint8_t) code, state->ip); end_block = true
|
|
|
|
#define SET(cc, dst) ga(set, cond_##cc); store(dst, 8)
|
|
#define SETN(cc, dst) ga(setn, cond_##cc); store(dst, 8)
|
|
// wins the prize for the most annoying instruction to generate
|
|
#define CMOV(cc, src, dst,z) do { \
|
|
gag(skipn, cond_##cc, 0); \
|
|
int start = state->size; \
|
|
load(src, z); store(dst, z); \
|
|
state->block->code[start - 1] = (state->size - start) * sizeof(long); \
|
|
} while (0)
|
|
#define CMOVN(cc, src, dst,z) do { \
|
|
gag(skip, cond_##cc, 0); \
|
|
int start = state->size; \
|
|
load(src, z); store(dst, z); \
|
|
state->block->code[start - 1] = (state->size - start) * sizeof(long); \
|
|
} while (0)
|
|
|
|
#define PUSHF() g(pushf)
|
|
#define POPF() g(popf)
|
|
#define SAHF g(sahf)
|
|
#define CLD g(cld)
|
|
#define STD g(std)
|
|
|
|
#define MUL18(val,z) MUL1(val,z)
|
|
#define MUL1(val,z) load(val, z); gz(mul, z)
|
|
#define IMUL1(val,z) load(val, z); gz(imul1, z)
|
|
#define DIV(val, z) load(val, z); gz(div, z)
|
|
#define IDIV(val, z) load(val, z); gz(idiv, z)
|
|
#define IMUL3(times, src, dst,z) load(src, z); op(imul, times, z); store(dst, z)
|
|
#define IMUL2(val, reg,z) IMUL3(val, reg, reg, z)
|
|
|
|
#define CVT ga(cvt, sz(oz))
|
|
#define CVTE ga(cvte, sz(oz))
|
|
|
|
#define ROL(count, val,z) los(rol, count, val, z)
|
|
#define ROR(count, val,z) los(ror, count, val, z)
|
|
#define RCL(count, val,z) los(rcl, count, val, z)
|
|
#define RCR(count, val,z) los(rcr, count, val, z)
|
|
#define SHL(count, val,z) los(shl, count, val, z)
|
|
#define SHR(count, val,z) los(shr, count, val, z)
|
|
#define SAR(count, val,z) los(sar, count, val, z)
|
|
|
|
#define SHLD(count, extra, dst,z) \
|
|
load(dst,z); \
|
|
if (arg_##count == arg_reg_c) op(shld_cl, extra,z); \
|
|
else { op(shld_imm, extra,z); GEN(imm); } \
|
|
store(dst,z)
|
|
#define SHRD(count, extra, dst,z) \
|
|
load(dst,z); \
|
|
if (arg_##count == arg_reg_c) op(shrd_cl, extra,z); \
|
|
else { op(shrd_imm, extra,z); GEN(imm); } \
|
|
store(dst,z)
|
|
|
|
#define BT(bit, val,z) lo(bt, val, bit, z)
|
|
#define BTC(bit, val,z) lo(btc, val, bit, z)
|
|
#define BTS(bit, val,z) lo(bts, val, bit, z)
|
|
#define BTR(bit, val,z) lo(btr, val, bit, z)
|
|
#define BSF(src, dst,z) los(bsf, src, dst, z)
|
|
#define BSR(src, dst,z) los(bsr, src, dst, z)
|
|
|
|
#define BSWAP(dst) ga(bswap, arg_##dst)
|
|
|
|
#define strop(op, rep, z) gag(op, sz(z) * size_count + rep_##rep, saved_ip)
|
|
#define STR(op, z) strop(op, once, z)
|
|
#define REP(op, z) strop(op, rep, z)
|
|
#define REPZ(op, z) strop(op, repz, z)
|
|
#define REPNZ(op, z) strop(op, repnz, z)
|
|
|
|
#define CMPXCHG(src, dst,z) load(src, z); op(cmpxchg, dst, z)
|
|
#define CMPXCHG8B(dst,z) g_addr(); gg(cmpxchg8b, saved_ip)
|
|
#define XADD(src, dst,z) XCHG(src, dst,z); ADD(src, dst,z)
|
|
|
|
void helper_rdtsc(struct cpu_state *cpu);
|
|
#define RDTSC h(helper_rdtsc)
|
|
#define CPUID() g(cpuid)
|
|
|
|
// atomic
|
|
#define atomic_op(type, src, dst,z) load(src, z); op(atomic_##type, dst, z)
|
|
#define ATOMIC_ADD(src, dst,z) atomic_op(add, src, dst, z)
|
|
#define ATOMIC_OR(src, dst,z) atomic_op(or, src, dst, z)
|
|
#define ATOMIC_ADC(src, dst,z) atomic_op(adc, src, dst, z)
|
|
#define ATOMIC_SBB(src, dst,z) atomic_op(sbb, src, dst, z)
|
|
#define ATOMIC_AND(src, dst,z) atomic_op(and, src, dst, z)
|
|
#define ATOMIC_SUB(src, dst,z) atomic_op(sub, src, dst, z)
|
|
#define ATOMIC_XOR(src, dst,z) atomic_op(xor, src, dst, z)
|
|
#define ATOMIC_INC(val,z) op(atomic_inc, val, z)
|
|
#define ATOMIC_DEC(val,z) op(atomic_dec, val, z)
|
|
#define ATOMIC_CMPXCHG(src, dst,z) atomic_op(cmpxchg, src, dst, z)
|
|
#define ATOMIC_XADD(src, dst,z) load(src, z); op(atomic_xadd, dst, z); store(src, z)
|
|
#define ATOMIC_BTC(bit, val,z) lo(atomic_btc, val, bit, z)
|
|
#define ATOMIC_BTS(bit, val,z) lo(atomic_bts, val, bit, z)
|
|
#define ATOMIC_BTR(bit, val,z) lo(atomic_btr, val, bit, z)
|
|
#define ATOMIC_CMPXCHG8B(dst,z) g_addr(); gg(atomic_cmpxchg8b, saved_ip)
|
|
|
|
// fpu
|
|
#define st_0 0
|
|
#define st_i modrm.rm_opcode
|
|
#define FLD() hh(fpu_ld, st_i);
|
|
#define FILD(val,z) h_read(fpu_ild, z)
|
|
#define FLDM(val,z) h_read(fpu_ldm, z)
|
|
#define FSTM(dst,z) h_write(fpu_stm, z)
|
|
#define FIST(dst,z) h_write(fpu_ist, z)
|
|
#define FXCH() hh(fpu_xch, st_i)
|
|
#define FCOM() hh(fpu_com, st_i)
|
|
#define FCOMM(val,z) h_read(fpu_comm, z)
|
|
#define FICOM(val,z) h_read(fpu_icom, z)
|
|
#define FUCOM() hh(fpu_ucom, st_i)
|
|
#define FUCOMI() hh(fpu_ucomi, st_i)
|
|
#define FCOMI() hh(fpu_comi, st_i)
|
|
#define FTST() h(fpu_tst)
|
|
#define FXAM() h(fpu_xam)
|
|
#define FST() hh(fpu_st, st_i)
|
|
#define FCHS() h(fpu_chs)
|
|
#define FABS() h(fpu_abs)
|
|
#define FLDC(what) hh(fpu_ldc, fconst_##what)
|
|
#define FPREM() h(fpu_prem)
|
|
#define FRNDINT() h(fpu_rndint)
|
|
#define FSCALE() h(fpu_scale)
|
|
#define FSQRT() h(fpu_sqrt)
|
|
#define FYL2X() h(fpu_yl2x)
|
|
#define F2XM1() h(fpu_2xm1)
|
|
#define FSTSW(dst) if (arg_##dst == arg_reg_a) g(fstsw_ax); else UNDEFINED
|
|
#define FSTCW(dst) if (arg_##dst == arg_reg_a) UNDEFINED; else h_write(fpu_stcw, 16)
|
|
#define FLDCW(dst) if (arg_##dst == arg_reg_a) UNDEFINED; else h_read(fpu_ldcw, 16)
|
|
#define FSTENV(val,z) h_write(fpu_stenv, z)
|
|
#define FLDENV(val,z) h_write(fpu_ldenv, z)
|
|
#define FSAVE(val,z) h_write(fpu_save, z)
|
|
#define FRESTORE(val,z) h_write(fpu_restore, z)
|
|
#define FPOP h(fpu_pop)
|
|
#define FINCSTP() h(fpu_incstp)
|
|
#define FADD(src, dst) hhh(fpu_add, src, dst)
|
|
#define FIADD(val,z) h_read(fpu_iadd, z)
|
|
#define FADDM(val,z) h_read(fpu_addm, z)
|
|
#define FSUB(src, dst) hhh(fpu_sub, src, dst)
|
|
#define FSUBM(val,z) h_read(fpu_subm, z)
|
|
#define FISUB(val,z) h_read(fpu_isub, z)
|
|
#define FISUBR(val,z) h_read(fpu_isubr, z)
|
|
#define FSUBR(src, dst) hhh(fpu_subr, src, dst)
|
|
#define FSUBRM(val,z) h_read(fpu_subrm, z)
|
|
#define FMUL(src, dst) hhh(fpu_mul, src, dst)
|
|
#define FIMUL(val,z) h_read(fpu_imul, z)
|
|
#define FMULM(val,z) h_read(fpu_mulm, z)
|
|
#define FDIV(src, dst) hhh(fpu_div, src, dst)
|
|
#define FIDIV(val,z) h_read(fpu_idiv, z)
|
|
#define FDIVM(val,z) h_read(fpu_divm, z)
|
|
#define FDIVR(src, dst) hhh(fpu_divr, src, dst)
|
|
#define FIDIVR(val,z) h_read(fpu_idivr, z)
|
|
#define FDIVRM(val,z) h_read(fpu_divrm, z)
|
|
#define FPATAN() h(fpu_patan)
|
|
#define FSIN() h(fpu_sin)
|
|
#define FCOS() h(fpu_cos)
|
|
|
|
// vector
|
|
|
|
// sync with VEC_ARG_LIST
|
|
enum vec_arg {
|
|
vec_arg_xmm, vec_arg_reg, vec_arg_imm, vec_arg_count,
|
|
vec_arg_mem,
|
|
};
|
|
|
|
static inline enum vec_arg vecarg(enum arg arg, struct modrm *modrm) {
|
|
switch (arg) {
|
|
case arg_modrm_reg:
|
|
return vec_arg_reg;
|
|
case arg_imm:
|
|
return vec_arg_imm;
|
|
case arg_xmm_modrm_reg:
|
|
return vec_arg_xmm;
|
|
case arg_modrm_val:
|
|
if (modrm->type == modrm_reg)
|
|
return vec_arg_reg;
|
|
return vec_arg_mem;
|
|
case arg_xmm_modrm_val:
|
|
if (modrm->type == modrm_reg)
|
|
return vec_arg_xmm;
|
|
return vec_arg_mem;
|
|
default:
|
|
die("unimplemented vecarg");
|
|
}
|
|
}
|
|
|
|
static inline bool gen_vec(enum arg rm, enum arg reg, void (*helper)(), gadget_t (*helper_gadgets_mem)[vec_arg_count], struct gen_state *state, struct modrm *modrm, uint8_t imm, dword_t saved_ip, bool seg_gs) {
|
|
enum vec_arg v_reg = vecarg(reg, modrm);
|
|
enum vec_arg v_rm = vecarg(rm, modrm);
|
|
|
|
gadget_t gadget;
|
|
if (v_rm == vec_arg_mem) {
|
|
gadget = (*helper_gadgets_mem)[v_reg];
|
|
} else {
|
|
extern gadget_t vec_helper_reg_gadgets[vec_arg_count][vec_arg_count];
|
|
gadget = vec_helper_reg_gadgets[v_reg][v_rm];
|
|
}
|
|
if (gadget == NULL) {
|
|
UNDEFINED;
|
|
}
|
|
|
|
switch (v_rm) {
|
|
case vec_arg_xmm:
|
|
GEN(gadget);
|
|
GEN(helper);
|
|
GEN((modrm->opcode * sizeof(union xmm_reg))
|
|
| (modrm->rm_opcode * sizeof(union xmm_reg) << 8));
|
|
break;
|
|
|
|
case vec_arg_mem:
|
|
gen_addr(state, modrm, seg_gs, saved_ip);
|
|
GEN(gadget);
|
|
GEN(saved_ip);
|
|
GEN(helper);
|
|
GEN(modrm->opcode * sizeof(union xmm_reg));
|
|
break;
|
|
|
|
case vec_arg_imm:
|
|
// TODO: support immediates and opcode
|
|
GEN(gadget);
|
|
GEN(helper);
|
|
GEN((modrm->rm_opcode * sizeof(union xmm_reg))
|
|
| (((uint16_t) imm) << 8));
|
|
break;
|
|
|
|
default: die("unimplemented vecarg");
|
|
}
|
|
return true;
|
|
}
|
|
|
|
#define _v(src, dst, helper, helper_gadgets, z) do { \
|
|
extern gadget_t helper_gadgets[vec_arg_count]; \
|
|
if (!gen_vec(src, dst, (void (*)()) helper, &helper_gadgets, state, &modrm, 0, saved_ip, seg_gs)) return false; \
|
|
} while (0)
|
|
#define _v_imm(imm, dst, helper, helper_gadgets, z) do { \
|
|
extern gadget_t helper_gadgets[vec_arg_count]; \
|
|
if (!gen_vec(arg_imm, dst, (void (*)()) helper, &helper_gadgets, state, &modrm, imm, saved_ip, seg_gs)) return false; \
|
|
} while (0)
|
|
#define v(op, src, dst,z) _v(arg_##src, arg_##dst, vec_##op##z, vec_helper_load##z##_gadgets, z)
|
|
#define v_imm(op, imm, dst,z) _v_imm(imm, arg_##dst, vec_##op##z, vec_helper_load##z##_gadgets, z)
|
|
#define v_write(op, src, dst,z) _v(arg_##dst, arg_##src, vec_##op##z, vec_helper_store##z##_gadgets, z)
|
|
|
|
#define VLOAD(src, dst,z) v(load, src, dst,z)
|
|
#define VZLOAD(src, dst,z) v(zload, src, dst, z)
|
|
#define VLOAD_PADNOTMEM(src, dst, z) do { \
|
|
if (arg_##src == arg_xmm_modrm_val && modrm.type != modrm_mem) { \
|
|
VZLOAD(src, dst, z); \
|
|
} else { \
|
|
VLOAD(src, dst, z); \
|
|
} \
|
|
} while (0)
|
|
#define VLOAD_PADMEM(src, dst, z) do { \
|
|
if (arg_##src == arg_xmm_modrm_val && modrm.type != modrm_reg) { \
|
|
VZLOAD(src, dst, z); \
|
|
} else { \
|
|
VLOAD(src, dst, z); \
|
|
} \
|
|
} while (0)
|
|
#define VSTORE(src, dst,z) v_write(store, src, dst,z)
|
|
#define VCOMPARE(src, dst,z) v(compare, src, dst,z)
|
|
#define VSHIFTR_IMM(reg, amount, z) v_imm(imm_shiftr, amount, reg,z)
|
|
#define VXOR(src, dst,z) v(xor, src, dst,z)
|
|
|
|
#define DECODER_RET int
|
|
#define DECODER_NAME gen_step
|
|
#define DECODER_ARGS struct gen_state *state, struct tlb *tlb
|
|
#define DECODER_PASS_ARGS state, tlb
|
|
|
|
#define OP_SIZE 32
|
|
#include "emu/decode.h"
|
|
#undef OP_SIZE
|
|
#define OP_SIZE 16
|
|
#include "emu/decode.h"
|
|
#undef OP_SIZE
|