diff --git a/ir.h b/ir.h index a45e6b7..ab48778 100644 --- a/ir.h +++ b/ir.h @@ -507,6 +507,8 @@ void ir_strtab_free(ir_strtab *strtab); #define IR_GEN_NATIVE (1<<19) #define IR_GEN_CODE (1<<20) /* C or LLVM */ +#define IR_GEN_CACHE_DEMOTE (1<<21) /* Demote the generated code from closest CPU caches */ + /* debug related */ #ifdef IR_DEBUG # define IR_DEBUG_SCCP (1<<27) @@ -837,14 +839,15 @@ int ir_patch(const void *code, size_t size, uint32_t jmp_table_size, const void /* CPU information (implementation in ir_cpuinfo.c) */ #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) -# define IR_X86_SSE2 (1<<0) -# define IR_X86_SSE3 (1<<1) -# define IR_X86_SSSE3 (1<<2) -# define IR_X86_SSE41 (1<<3) -# define IR_X86_SSE42 (1<<4) -# define IR_X86_AVX (1<<5) -# define IR_X86_AVX2 (1<<6) -# define IR_X86_BMI1 (1<<7) +# define IR_X86_SSE2 (1<<0) +# define IR_X86_SSE3 (1<<1) +# define IR_X86_SSSE3 (1<<2) +# define IR_X86_SSE41 (1<<3) +# define IR_X86_SSE42 (1<<4) +# define IR_X86_AVX (1<<5) +# define IR_X86_AVX2 (1<<6) +# define IR_X86_BMI1 (1<<7) +# define IR_X86_CLDEMOTE (1<<8) #endif uint32_t ir_cpuinfo(void); diff --git a/ir_cpuinfo.c b/ir_cpuinfo.c index 9e0abda..c746c38 100644 --- a/ir_cpuinfo.c +++ b/ir_cpuinfo.c @@ -47,6 +47,7 @@ uint32_t ir_cpuinfo(void) ir_cpuid_ex(info_0x7_0, 0x7, 0); if (bit(info_0x7_0[1], 5U)) ret |= IR_X86_AVX2; if (bit(info_0x7_0[1], 3U)) ret |= IR_X86_BMI1; + if (bit(info_0x7_0[2], 25U)) ret |= IR_X86_CLDEMOTE; #undef bit diff --git a/ir_x86.dasc b/ir_x86.dasc index 3a5b9a2..a0c63d4 100644 --- a/ir_x86.dasc +++ b/ir_x86.dasc @@ -9717,6 +9717,19 @@ next_block:; ir_mem_flush(entry, size); +#if defined(__GNUC__) + if ((ctx->flags & IR_GEN_CACHE_DEMOTE) && (ctx->mflags & IR_X86_CLDEMOTE)) { + uintptr_t start = (uintptr_t)entry; + uintptr_t p = (uintptr_t)start & ~0x3F; + + do { + /* _cldemote(p); */ + asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p)); + p += 64; + } while (p < start + size); + } +#endif + if (ctx->code_buffer == NULL) { ir_mem_protect(entry, size); }