cherry-pick TCG-related stable fixes for 7.2

When turning off the "KVM hardware virtualization" checkbox in Proxmox VE, the TCG accelerator is used, so these fixes are relevant then. The first patch is included to allow cherry-picking the others without changes. Reported-by: Thomas Lamprecht <t.lamprecht@proxmox.com> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
2023-03-17 13:47:11 +01:00 · 2023-03-17 13:47:11 +01:00 · e752bbe5e2
commit e752bbe5e2
parent 018ef788b3
6 changed files with 691 additions and 0 deletions
--- a/debian/patches/extra/0024-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch
+++ b/debian/patches/extra/0024-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch
@ -0,0 +1,286 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Richard Henderson <richard.henderson@linaro.org>
 Date: Sat, 14 Jan 2023 13:05:41 -1000
 Subject: [PATCH] tests/tcg/i386: Introduce and use reg_t consistently
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
 Define reg_t based on the actual register width.
 Define the inlines using that type.  This will allow
 input registers to 32-bit insns to be set to 64-bit
 values on x86-64, which allows testing various edge cases.
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
 Message-Id: <20230114230542.3116013-2-richard.henderson@linaro.org>
 Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
 (cherry-picked from commit 5d62d6649cd367b5b4a3676e7514d2f9ca86cb03)
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
 tests/tcg/i386/test-i386-bmi2.c | 182 ++++++++++++++++----------------
 1 file changed, 93 insertions(+), 89 deletions(-)
 diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c
 index 5fadf47510..3c3ef85513 100644
 --- a/tests/tcg/i386/test-i386-bmi2.c
 +++ b/tests/tcg/i386/test-i386-bmi2.c
@@ -3,34 +3,40 @@
 #include <stdint.h>
 #include <stdio.h>
 +#ifdef __x86_64
 +typedef uint64_t reg_t;
 +#else
 +typedef uint32_t reg_t;
 +#endif
 +
 #define insn1q(name, arg0)                                                           \
 -static inline uint64_t name##q(uint64_t arg0)                                        \
 +static inline reg_t name##q(reg_t arg0)                                              \
 {                                                                                    \
 -    uint64_t result64;                                                               \
 +    reg_t result64;                                                                  \
     asm volatile (#name "q   %1, %0" : "=r"(result64) : "rm"(arg0));                 \
     return result64;                                                                 \
 }
 #define insn1l(name, arg0)                                                           \
 -static inline uint32_t name##l(uint32_t arg0)                                        \
 +static inline reg_t name##l(reg_t arg0)                                              \
 {                                                                                    \
 -    uint32_t result32;                                                               \
 +    reg_t result32;                                                                  \
     asm volatile (#name "l   %k1, %k0" : "=r"(result32) : "rm"(arg0));               \
     return result32;                                                                 \
 }
 #define insn2q(name, arg0, c0, arg1, c1)                                             \
 -static inline uint64_t name##q(uint64_t arg0, uint64_t arg1)                         \
 +static inline reg_t name##q(reg_t arg0, reg_t arg1)                                  \
 {                                                                                    \
 -    uint64_t result64;                                                               \
 +    reg_t result64;                                                                  \
     asm volatile (#name "q   %2, %1, %0" : "=r"(result64) : c0(arg0), c1(arg1));     \
     return result64;                                                                 \
 }
 #define insn2l(name, arg0, c0, arg1, c1)                                             \
 -static inline uint32_t name##l(uint32_t arg0, uint32_t arg1)                         \
 +static inline reg_t name##l(reg_t arg0, reg_t arg1)                                  \
 {                                                                                    \
 -    uint32_t result32;                                                               \
 +    reg_t result32;                                                                  \
     asm volatile (#name "l   %k2, %k1, %k0" : "=r"(result32) : c0(arg0), c1(arg1));  \
     return result32;                                                                 \
 }
@@ -65,130 +71,128 @@ insn1l(blsr, src)
 int main(int argc, char *argv[]) {
     uint64_t ehlo = 0x202020204f4c4845ull;
     uint64_t mask = 0xa080800302020001ull;
 -    uint32_t result32;
 +    reg_t result;
 #ifdef __x86_64
 -    uint64_t result64;
 -
     /* 64 bits */
 -    result64 = andnq(mask, ehlo);
 -    assert(result64 == 0x002020204d4c4844);
 +    result = andnq(mask, ehlo);
 +    assert(result == 0x002020204d4c4844);
 -    result64 = pextq(ehlo, mask);
 -    assert(result64 == 133);
 +    result = pextq(ehlo, mask);
 +    assert(result == 133);
 -    result64 = pdepq(result64, mask);
 -    assert(result64 == (ehlo & mask));
 +    result = pdepq(result, mask);
 +    assert(result == (ehlo & mask));
 -    result64 = pextq(-1ull, mask);
 -    assert(result64 == 511); /* mask has 9 bits set */
 +    result = pextq(-1ull, mask);
 +    assert(result == 511); /* mask has 9 bits set */
 -    result64 = pdepq(-1ull, mask);
 -    assert(result64 == mask);
 +    result = pdepq(-1ull, mask);
 +    assert(result == mask);
 -    result64 = bextrq(mask, 0x3f00);
 -    assert(result64 == (mask & ~INT64_MIN));
 +    result = bextrq(mask, 0x3f00);
 +    assert(result == (mask & ~INT64_MIN));
 -    result64 = bextrq(mask, 0x1038);
 -    assert(result64 == 0xa0);
 +    result = bextrq(mask, 0x1038);
 +    assert(result == 0xa0);
 -    result64 = bextrq(mask, 0x10f8);
 -    assert(result64 == 0);
 +    result = bextrq(mask, 0x10f8);
 +    assert(result == 0);
 -    result64 = blsiq(0x30);
 -    assert(result64 == 0x10);
 +    result = blsiq(0x30);
 +    assert(result == 0x10);
 -    result64 = blsiq(0x30ull << 32);
 -    assert(result64 == 0x10ull << 32);
 +    result = blsiq(0x30ull << 32);
 +    assert(result == 0x10ull << 32);
 -    result64 = blsmskq(0x30);
 -    assert(result64 == 0x1f);
 +    result = blsmskq(0x30);
 +    assert(result == 0x1f);
 -    result64 = blsrq(0x30);
 -    assert(result64 == 0x20);
 +    result = blsrq(0x30);
 +    assert(result == 0x20);
 -    result64 = blsrq(0x30ull << 32);
 -    assert(result64 == 0x20ull << 32);
 +    result = blsrq(0x30ull << 32);
 +    assert(result == 0x20ull << 32);
 -    result64 = bzhiq(mask, 0x3f);
 -    assert(result64 == (mask & ~INT64_MIN));
 +    result = bzhiq(mask, 0x3f);
 +    assert(result == (mask & ~INT64_MIN));
 -    result64 = bzhiq(mask, 0x1f);
 -    assert(result64 == (mask & ~(-1 << 30)));
 +    result = bzhiq(mask, 0x1f);
 +    assert(result == (mask & ~(-1 << 30)));
 -    result64 = rorxq(0x2132435465768798, 8);
 -    assert(result64 == 0x9821324354657687);
 +    result = rorxq(0x2132435465768798, 8);
 +    assert(result == 0x9821324354657687);
 -    result64 = sarxq(0xffeeddccbbaa9988, 8);
 -    assert(result64 == 0xffffeeddccbbaa99);
 +    result = sarxq(0xffeeddccbbaa9988, 8);
 +    assert(result == 0xffffeeddccbbaa99);
 -    result64 = sarxq(0x77eeddccbbaa9988, 8 | 64);
 -    assert(result64 == 0x0077eeddccbbaa99);
 +    result = sarxq(0x77eeddccbbaa9988, 8 | 64);
 +    assert(result == 0x0077eeddccbbaa99);
 -    result64 = shrxq(0xffeeddccbbaa9988, 8);
 -    assert(result64 == 0x00ffeeddccbbaa99);
 +    result = shrxq(0xffeeddccbbaa9988, 8);
 +    assert(result == 0x00ffeeddccbbaa99);
 -    result64 = shrxq(0x77eeddccbbaa9988, 8 | 192);
 -    assert(result64 == 0x0077eeddccbbaa99);
 +    result = shrxq(0x77eeddccbbaa9988, 8 | 192);
 +    assert(result == 0x0077eeddccbbaa99);
 -    result64 = shlxq(0xffeeddccbbaa9988, 8);
 -    assert(result64 == 0xeeddccbbaa998800);
 +    result = shlxq(0xffeeddccbbaa9988, 8);
 +    assert(result == 0xeeddccbbaa998800);
 #endif
     /* 32 bits */
 -    result32 = andnl(mask, ehlo);
 -    assert(result32 == 0x04d4c4844);
 +    result = andnl(mask, ehlo);
 +    assert(result == 0x04d4c4844);
 -    result32 = pextl((uint32_t) ehlo, mask);
 -    assert(result32 == 5);
 +    result = pextl((uint32_t) ehlo, mask);
 +    assert(result == 5);
 -    result32 = pdepl(result32, mask);
 -    assert(result32 == (uint32_t)(ehlo & mask));
 +    result = pdepl(result, mask);
 +    assert(result == (uint32_t)(ehlo & mask));
 -    result32 = pextl(-1u, mask);
 -    assert(result32 == 7); /* mask has 3 bits set */
 +    result = pextl(-1u, mask);
 +    assert(result == 7); /* mask has 3 bits set */
 -    result32 = pdepl(-1u, mask);
 -    assert(result32 == (uint32_t)mask);
 +    result = pdepl(-1u, mask);
 +    assert(result == (uint32_t)mask);
 -    result32 = bextrl(mask, 0x1f00);
 -    assert(result32 == (mask & ~INT32_MIN));
 +    result = bextrl(mask, 0x1f00);
 +    assert(result == (mask & ~INT32_MIN));
 -    result32 = bextrl(ehlo, 0x1018);
 -    assert(result32 == 0x4f);
 +    result = bextrl(ehlo, 0x1018);
 +    assert(result == 0x4f);
 -    result32 = bextrl(mask, 0x1038);
 -    assert(result32 == 0);
 +    result = bextrl(mask, 0x1038);
 +    assert(result == 0);
 -    result32 = blsil(0xffff);
 -    assert(result32 == 1);
 +    result = blsil(0xffff);
 +    assert(result == 1);
 -    result32 = blsmskl(0x300);
 -    assert(result32 == 0x1ff);
 +    result = blsmskl(0x300);
 +    assert(result == 0x1ff);
 -    result32 = blsrl(0xffc);
 -    assert(result32 == 0xff8);
 +    result = blsrl(0xffc);
 +    assert(result == 0xff8);
 -    result32 = bzhil(mask, 0xf);
 -    assert(result32 == 1);
 +    result = bzhil(mask, 0xf);
 +    assert(result == 1);
 -    result32 = rorxl(0x65768798, 8);
 -    assert(result32 == 0x98657687);
 +    result = rorxl(0x65768798, 8);
 +    assert(result == 0x98657687);
 -    result32 = sarxl(0xffeeddcc, 8);
 -    assert(result32 == 0xffffeedd);
 +    result = sarxl(0xffeeddcc, 8);
 +    assert(result == 0xffffeedd);
 -    result32 = sarxl(0x77eeddcc, 8 | 32);
 -    assert(result32 == 0x0077eedd);
 +    result = sarxl(0x77eeddcc, 8 | 32);
 +    assert(result == 0x0077eedd);
 -    result32 = shrxl(0xffeeddcc, 8);
 -    assert(result32 == 0x00ffeedd);
 +    result = shrxl(0xffeeddcc, 8);
 +    assert(result == 0x00ffeedd);
 -    result32 = shrxl(0x77eeddcc, 8 | 128);
 -    assert(result32 == 0x0077eedd);
 +    result = shrxl(0x77eeddcc, 8 | 128);
 +    assert(result == 0x0077eedd);
 -    result32 = shlxl(0xffeeddcc, 8);
 -    assert(result32 == 0xeeddcc00);
 +    result = shlxl(0xffeeddcc, 8);
 +    assert(result == 0xeeddcc00);
     return 0;
 }
--- a/debian/patches/extra/0025-target-i386-Fix-BEXTR-instruction.patch
+++ b/debian/patches/extra/0025-target-i386-Fix-BEXTR-instruction.patch
@ -0,0 +1,97 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Richard Henderson <richard.henderson@linaro.org>
 Date: Sat, 14 Jan 2023 13:05:42 -1000
 Subject: [PATCH] target/i386: Fix BEXTR instruction
 There were two problems here: not limiting the input to operand bits,
 and not correctly handling large extraction length.
 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1372
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 Message-Id: <20230114230542.3116013-3-richard.henderson@linaro.org>
 Cc: qemu-stable@nongnu.org
 Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18)
 Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
 (cherry-picked from commit b14c0098975264ed03144f145bca0179a6763a07)
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
 target/i386/tcg/emit.c.inc      | 22 +++++++++++-----------
 tests/tcg/i386/test-i386-bmi2.c | 12 ++++++++++++
 2 files changed, 23 insertions(+), 11 deletions(-)
 diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
 index 7037ff91c6..99f6ba6e19 100644
 --- a/target/i386/tcg/emit.c.inc
 +++ b/target/i386/tcg/emit.c.inc
@@ -1078,30 +1078,30 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
 -    TCGv bound, zero;
 +    TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
 +    TCGv zero = tcg_constant_tl(0);
 +    TCGv mone = tcg_constant_tl(-1);
     /*
      * Extract START, and shift the operand.
      * Shifts larger than operand size get zeros.
      */
     tcg_gen_ext8u_tl(s->A0, s->T1);
 +    if (TARGET_LONG_BITS == 64 && ot == MO_32) {
 +        tcg_gen_ext32u_tl(s->T0, s->T0);
 +    }
     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
 -    bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
 -    zero = tcg_constant_tl(0);
     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero);
     /*
 -     * Extract the LEN into a mask.  Lengths larger than
 -     * operand size get all ones.
 +     * Extract the LEN into an inverse mask.  Lengths larger than
 +     * operand size get all zeros, length 0 gets all ones.
      */
     tcg_gen_extract_tl(s->A0, s->T1, 8, 8);
 -    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound);
 -
 -    tcg_gen_movi_tl(s->T1, 1);
 -    tcg_gen_shl_tl(s->T1, s->T1, s->A0);
 -    tcg_gen_subi_tl(s->T1, s->T1, 1);
 -    tcg_gen_and_tl(s->T0, s->T0, s->T1);
 +    tcg_gen_shl_tl(s->T1, mone, s->A0);
 +    tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero);
 +    tcg_gen_andc_tl(s->T0, s->T0, s->T1);
     gen_op_update1_cc(s);
     set_cc_op(s, CC_OP_LOGICB + ot);
 diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c
 index 3c3ef85513..982d4abda4 100644
 --- a/tests/tcg/i386/test-i386-bmi2.c
 +++ b/tests/tcg/i386/test-i386-bmi2.c
@@ -99,6 +99,9 @@ int main(int argc, char *argv[]) {
     result = bextrq(mask, 0x10f8);
     assert(result == 0);
 +    result = bextrq(0xfedcba9876543210ull, 0x7f00);
 +    assert(result == 0xfedcba9876543210ull);
 +
     result = blsiq(0x30);
     assert(result == 0x10);
@@ -164,6 +167,15 @@ int main(int argc, char *argv[]) {
     result = bextrl(mask, 0x1038);
     assert(result == 0);
 +    result = bextrl((reg_t)0x8f635a775ad3b9b4ull, 0x3018);
 +    assert(result == 0x5a);
 +
 +    result = bextrl((reg_t)0xfedcba9876543210ull, 0x7f00);
 +    assert(result == 0x76543210u);
 +
 +    result = bextrl(-1, 0);
 +    assert(result == 0);
 +
     result = blsil(0xffff);
     assert(result == 1);
--- a/debian/patches/extra/0026-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch
+++ b/debian/patches/extra/0026-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch
@ -0,0 +1,47 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Richard Henderson <richard.henderson@linaro.org>
 Date: Sat, 14 Jan 2023 08:06:01 -1000
 Subject: [PATCH] target/i386: Fix C flag for BLSI, BLSMSK, BLSR
 We forgot to set cc_src, which is used for computing C.
 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1370
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 Message-Id: <20230114180601.2993644-1-richard.henderson@linaro.org>
 Cc: qemu-stable@nongnu.org
 Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18)
 Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
 (cherry-picked from commit 99282098dc74c2055bde5652bde6cf0067d0c370)
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
 target/i386/tcg/emit.c.inc | 3 +++
 1 file changed, 3 insertions(+)
 diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
 index 99f6ba6e19..4d7702c106 100644
 --- a/target/i386/tcg/emit.c.inc
 +++ b/target/i386/tcg/emit.c.inc
@@ -1111,6 +1111,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
 +    tcg_gen_mov_tl(cpu_cc_src, s->T0);
     tcg_gen_neg_tl(s->T1, s->T0);
     tcg_gen_and_tl(s->T0, s->T0, s->T1);
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
@@ -1121,6 +1122,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
 {
     MemOp ot = decode->op[0].ot;
 +    tcg_gen_mov_tl(cpu_cc_src, s->T0);
     tcg_gen_subi_tl(s->T1, s->T0, 1);
     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
@@ -1131,6 +1133,7 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
 +    tcg_gen_mov_tl(cpu_cc_src, s->T0);
     tcg_gen_subi_tl(s->T1, s->T0, 1);
     tcg_gen_and_tl(s->T0, s->T0, s->T1);
     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
--- a/debian/patches/extra/0027-target-i386-fix-ADOX-followed-by-ADCX.patch
+++ b/debian/patches/extra/0027-target-i386-fix-ADOX-followed-by-ADCX.patch
@ -0,0 +1,192 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Paolo Bonzini <pbonzini@redhat.com>
 Date: Tue, 31 Jan 2023 09:48:03 +0100
 Subject: [PATCH] target/i386: fix ADOX followed by ADCX
 When ADCX is followed by ADOX or vice versa, the second instruction's
 carry comes from EFLAGS and the condition codes use the CC_OP_ADCOX
 operation.  Retrieving the carry from EFLAGS is handled by this bit
 of gen_ADCOX:
        tcg_gen_extract_tl(carry_in, cpu_cc_src,
            ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1);
 Unfortunately, in this case cc_op has been overwritten by the previous
 "if" statement to CC_OP_ADCOX.  This works by chance when the first
 instruction is ADCX; however, if the first instruction is ADOX,
 ADCX will incorrectly take its carry from OF instead of CF.
 Fix by moving the computation of the new cc_op at the end of the function.
 The included exhaustive test case fails without this patch and passes
 afterwards.
 Because ADCX/ADOX need not be invoked through the VEX prefix, this
 regression bisects to commit 16fc5726a6e2 ("target/i386: reimplement
 0x0f 0x38, add AVX", 2022-10-18).  However, the mistake happened a
 little earlier, when BMI instructions were rewritten using the new
 decoder framework.
 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1471
 Reported-by: Paul Jolly <https://gitlab.com/myitcv>
 Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18)
 Cc: qemu-stable@nongnu.org
 Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
 (cherry-picked from commit 60c7dd22e1383754d5f150bc9f7c2785c662a7b6)
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
 target/i386/tcg/emit.c.inc       | 20 +++++----
 tests/tcg/i386/Makefile.target   |  6 ++-
 tests/tcg/i386/test-i386-adcox.c | 75 ++++++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+), 10 deletions(-)
 create mode 100644 tests/tcg/i386/test-i386-adcox.c
 diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
 index 4d7702c106..0d7c6e80ae 100644
 --- a/target/i386/tcg/emit.c.inc
 +++ b/target/i386/tcg/emit.c.inc
@@ -1015,6 +1015,7 @@ VSIB_AVX(VPGATHERQ, vpgatherq)
 static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
 {
 +    int opposite_cc_op;
     TCGv carry_in = NULL;
     TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
     TCGv zero;
@@ -1022,14 +1023,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
     if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) {
         /* Re-use the carry-out from a previous round.  */
         carry_in = carry_out;
 -        cc_op = s->cc_op;
 -    } else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) {
 -        /* Merge with the carry-out from the opposite instruction.  */
 -        cc_op = CC_OP_ADCOX;
 -    }
 -
 -    /* If we don't have a carry-in, get it out of EFLAGS.  */
 -    if (!carry_in) {
 +    } else {
 +        /* We don't have a carry-in, get it out of EFLAGS.  */
         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
             gen_compute_eflags(s);
         }
@@ -1053,7 +1048,14 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
         tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero);
         break;
     }
 -    set_cc_op(s, cc_op);
 +
 +    opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX;
 +    if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) {
 +        /* Merge with the carry-out from the opposite instruction.  */
 +        set_cc_op(s, CC_OP_ADCOX);
 +    } else {
 +        set_cc_op(s, cc_op);
 +    }
 }
 static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
 index 81831cafbc..bafd8c2180 100644
 --- a/tests/tcg/i386/Makefile.target
 +++ b/tests/tcg/i386/Makefile.target
@@ -14,7 +14,7 @@ config-cc.mak: Makefile
 I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c))
 ALL_X86_TESTS=$(I386_SRCS:.c=)
 SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx
 -X86_64_TESTS:=$(filter test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
 +X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
 test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse
 run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max
@@ -28,6 +28,10 @@ test-i386-bmi2: CFLAGS=-O2
 run-test-i386-bmi2: QEMU_OPTS += -cpu max
 run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max
 +test-i386-adcox: CFLAGS=-O2
 +run-test-i386-adcox: QEMU_OPTS += -cpu max
 +run-plugin-test-i386-adcox-%: QEMU_OPTS += -cpu max
 +
 #
 # hello-i386 is a barebones app
 #
 diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c
 new file mode 100644
 index 0000000000..16169efff8
 --- /dev/null
 +++ b/tests/tcg/i386/test-i386-adcox.c
@@ -0,0 +1,75 @@
 +/* See if various BMI2 instructions give expected results */
 +#include <assert.h>
 +#include <stdint.h>
 +#include <stdio.h>
 +
 +#define CC_C 1
 +#define CC_O (1 << 11)
 +
 +#ifdef __x86_64__
 +#define REG uint64_t
 +#else
 +#define REG uint32_t
 +#endif
 +
 +void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand)
 +{
 +    REG flags;
 +    REG out_adcx, out_adox;
 +
 +    asm("pushf; pop %0" : "=r"(flags));
 +    flags &= ~(CC_C | CC_O);
 +    flags |= (in_c ? CC_C : 0);
 +    flags |= (in_o ? CC_O : 0);
 +
 +    out_adcx = adcx_operand;
 +    out_adox = adox_operand;
 +    asm("push %0; popf;"
 +        "adox %3, %2;"
 +        "adcx %3, %1;"
 +        "pushf; pop %0"
 +        : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
 +        : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
 +
 +    assert(out_adcx == in_c + adcx_operand - 1);
 +    assert(out_adox == in_o + adox_operand - 1);
 +    assert(!!(flags & CC_C) == (in_c || adcx_operand));
 +    assert(!!(flags & CC_O) == (in_o || adox_operand));
 +}
 +
 +void test_adcx_adox(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand)
 +{
 +    REG flags;
 +    REG out_adcx, out_adox;
 +
 +    asm("pushf; pop %0" : "=r"(flags));
 +    flags &= ~(CC_C | CC_O);
 +    flags |= (in_c ? CC_C : 0);
 +    flags |= (in_o ? CC_O : 0);
 +
 +    out_adcx = adcx_operand;
 +    out_adox = adox_operand;
 +    asm("push %0; popf;"
 +        "adcx %3, %1;"
 +        "adox %3, %2;"
 +        "pushf; pop %0"
 +        : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
 +        : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
 +
 +    assert(out_adcx == in_c + adcx_operand - 1);
 +    assert(out_adox == in_o + adox_operand - 1);
 +    assert(!!(flags & CC_C) == (in_c || adcx_operand));
 +    assert(!!(flags & CC_O) == (in_o || adox_operand));
 +}
 +
 +int main(int argc, char *argv[]) {
 +    /* try all combinations of input CF, input OF, CF from op1+op2,  OF from op2+op1 */
 +    int i;
 +    for (i = 0; i <= 15; i++) {
 +        printf("%d\n", i);
 +        test_adcx_adox(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8));
 +        test_adox_adcx(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8));
 +    }
 +    return 0;
 +}
 +
--- a/debian/patches/extra/0028-target-i386-Fix-BZHI-instruction.patch
+++ b/debian/patches/extra/0028-target-i386-Fix-BZHI-instruction.patch
@ -0,0 +1,64 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Richard Henderson <richard.henderson@linaro.org>
 Date: Sat, 14 Jan 2023 13:32:06 -1000
 Subject: [PATCH] target/i386: Fix BZHI instruction
 We did not correctly handle N >= operand size.
 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1374
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 Message-Id: <20230114233206.3118472-1-richard.henderson@linaro.org>
 Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
 (cherry-picked from commit 9ad2ba6e8e7fc195d0dd0b76ab38bd2fceb1bdd4)
 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
 ---
 target/i386/tcg/emit.c.inc      | 14 +++++++-------
 tests/tcg/i386/test-i386-bmi2.c |  3 +++
 2 files changed, 10 insertions(+), 7 deletions(-)
 diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
 index 0d7c6e80ae..7296f3952c 100644
 --- a/target/i386/tcg/emit.c.inc
 +++ b/target/i386/tcg/emit.c.inc
@@ -1145,20 +1145,20 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
 -    TCGv bound;
 +    TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
 +    TCGv zero = tcg_constant_tl(0);
 +    TCGv mone = tcg_constant_tl(-1);
 -    tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
 -    bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
 +    tcg_gen_ext8u_tl(s->T1, s->T1);
     /*
      * Note that since we're using BMILG (in order to get O
      * cleared) we need to store the inverse into C.
      */
 -    tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, s->T1, bound);
 -    tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, bound, bound, s->T1);
 +    tcg_gen_setcond_tl(TCG_COND_LEU, cpu_cc_src, s->T1, bound);
 -    tcg_gen_movi_tl(s->A0, -1);
 -    tcg_gen_shl_tl(s->A0, s->A0, s->T1);
 +    tcg_gen_shl_tl(s->A0, mone, s->T1);
 +    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero);
     tcg_gen_andc_tl(s->T0, s->T0, s->A0);
     gen_op_update1_cc(s);
 diff --git a/tests/tcg/i386/test-i386-bmi2.c b/tests/tcg/i386/test-i386-bmi2.c
 index 982d4abda4..0244df7987 100644
 --- a/tests/tcg/i386/test-i386-bmi2.c
 +++ b/tests/tcg/i386/test-i386-bmi2.c
@@ -123,6 +123,9 @@ int main(int argc, char *argv[]) {
     result = bzhiq(mask, 0x1f);
     assert(result == (mask & ~(-1 << 30)));
 +    result = bzhiq(mask, 0x40);
 +    assert(result == mask);
 +
     result = rorxq(0x2132435465768798, 8);
     assert(result == 0x9821324354657687);
--- a/debian/patches/series
+++ b/debian/patches/series
@ -21,6 +21,11 @@ extra/0020-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch
 extra/0021-memory-Allow-disabling-re-entrancy-checking-per-MR.patch
 extra/0022-lsi53c895a-disable-reentrancy-detection-for-script-R.patch
 extra/0023-acpi-cpuhp-fix-guest-visible-maximum-access-size-to-.patch
 extra/0024-tests-tcg-i386-Introduce-and-use-reg_t-consistently.patch
 extra/0025-target-i386-Fix-BEXTR-instruction.patch
 extra/0026-target-i386-Fix-C-flag-for-BLSI-BLSMSK-BLSR.patch
 extra/0027-target-i386-fix-ADOX-followed-by-ADCX.patch
 extra/0028-target-i386-Fix-BZHI-instruction.patch
 bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
 bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
 bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch