
--reset

--dt=\
    f8_e4m3:f8_e4m3:f32,\
    f8_e5m2:f8_e5m2:f32,\
    f8_e4m3:f8_e5m2:f8_e4m3,\
    f8_e4m3:f8_e5m2:f8_e5m2,\
    f8_e5m2:f8_e4m3:f8_e5m2,\
    f8_e5m2:f8_e4m3:f8_e4m3

--attr-post-ops=,sum:2,relu

--brgemm-attr=use_uker:1+use_interleave_stores:1,use_uker:0+use_interleave_stores:1

--batch=option_set_int8 # fp8 uses int8 blocking for AMX


# FP8 Emulation: using no interleaved stores changes the size of the temporary
# data buffer
--brgemm-attr=use_uker:1+use_interleave_stores:0,use_uker:0+use_interleave_stores:0
--bs=1
--batch=shapes_2d_no_tail_int8

--batch=shapes_2d_tail_n_int8
