diff --git a/vidur-alibabacloud/.gitignore b/vidur-alibabacloud/.gitignore index 5e0a4d9a..9f6eaea5 100644 --- a/vidur-alibabacloud/.gitignore +++ b/vidur-alibabacloud/.gitignore @@ -185,11 +185,25 @@ example # > fth_test/ fth_run/ +fth-test-*/logs/ +fth-test-*/simulator_output/ +fth-test-*/.claude/ +examples/vidur-ali-scenarios/logs/ +examples/vidur-ali-scenarios/simulator_output/ SimAI/ core.* *.csv +!data/aicb_workload/*.csv +!data/aicb_workload/cache/ +!data/aicb_workload/cache/*.csv +!data/aicb_workload/cache/*.json # Added rule to ignore all *fth.py files *fth.py # Added rule to recursively ignore all *fth.py files in all directories **/*fth.py +# Personal dev notes / fth files +README-fth.md +**/README-fth.md +fth.sh +**/fth.sh diff --git a/vidur-alibabacloud/data/aicb_workload/a.csv b/vidur-alibabacloud/data/aicb_workload/a.csv new file mode 100644 index 00000000..a218eb7b --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/a.csv @@ -0,0 +1,298 @@ +HYBRID_TRANSFORMER_FWD_IN_BCKWD model_parallel_NPU_group: 8 ep: 32 pp: 0 vpp: 61 ga: 1 all_gpus: 32 checkpoints: 0 checkpoint_initiates: 0 pp_comm: 0 +296 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +dense_mlp -1 20698 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +dense_mlp -1 20698 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +dense_mlp -1 20698 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 +attention_layer -1 185513 ALLREDUCE 1835008 0 NONE 0 0 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 262144 1 NONE 0 1 NONE 0 100 +moe_expert -1 151238 NONE 0 1 NONE 0 1 NONE 0 100 +moe_expert -1 1 ALLTOALL_EP 131072 1 NONE 0 1 NONE 0 100 +shared_experts -1 20698 NONE 0 0 NONE 0 0 NONE 0 100 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws16-tp8-pp1-ep16-bs1-seq100-prefill.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws16-tp8-pp1-ep16-bs1-seq100-prefill.json new file mode 100644 index 00000000..a358ce46 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws16-tp8-pp1-ep16-bs1-seq100-prefill.json @@ -0,0 +1,612 @@ +{ + "0": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32399.0, + "comm_size": 14336.0 + } + }, + "1": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32399.0, + "comm_size": 14336.0 + } + }, + "2": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32399.0, + "comm_size": 14336.0 + } + }, + "3": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "4": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "5": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "6": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "7": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "8": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "9": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "10": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "11": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "12": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "13": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "14": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "15": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "16": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "17": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "18": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "19": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "20": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "21": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "22": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "23": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "24": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "25": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "26": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "27": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "28": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "29": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "30": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "31": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "32": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "33": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "34": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "35": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "36": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "37": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "38": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "39": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "40": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "41": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "42": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "43": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "44": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "45": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "46": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "47": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "48": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "49": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "50": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "51": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "52": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "53": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "54": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "55": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "56": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "57": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "58": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "59": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "60": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws48-tp8-pp1-ep48-bs1-seq100-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws48-tp8-pp1-ep48-bs1-seq100-decode.json new file mode 100644 index 00000000..07509b83 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws48-tp8-pp1-ep48-bs1-seq100-decode.json @@ -0,0 +1,612 @@ +{ + "0": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32216.0, + "comm_size": 14336.0 + } + }, + "1": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32216.0, + "comm_size": 14336.0 + } + }, + "2": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32216.0, + "comm_size": 14336.0 + } + }, + "3": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "4": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "5": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "6": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "7": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "8": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "9": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "10": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "11": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "12": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "13": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "14": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "15": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "16": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "17": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "18": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "19": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "20": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "21": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "22": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "23": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "24": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "25": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "26": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "27": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "28": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "29": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "30": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "31": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "32": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "33": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "34": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "35": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "36": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "37": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "38": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "39": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "40": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "41": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "42": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "43": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "44": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "45": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "46": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "47": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "48": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "49": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "50": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "51": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "52": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "53": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "54": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "55": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "56": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "57": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "58": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "59": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + }, + "60": { + "attention": { + "comp_time": 53006.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156560.0, + "comm_size": 21728.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws48-tp8-pp1-ep48-bs1-seq106-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws48-tp8-pp1-ep48-bs1-seq106-decode.json new file mode 100644 index 00000000..782796ac --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws48-tp8-pp1-ep48-bs1-seq106-decode.json @@ -0,0 +1,612 @@ +{ + "0": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32274.0, + "comm_size": 14336.0 + } + }, + "1": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32274.0, + "comm_size": 14336.0 + } + }, + "2": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32274.0, + "comm_size": 14336.0 + } + }, + "3": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "4": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "5": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "6": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "7": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "8": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "9": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "10": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "11": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "12": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "13": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "14": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "15": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "16": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "17": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "18": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "19": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "20": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "21": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "22": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "23": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "24": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "25": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "26": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "27": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "28": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "29": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "30": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "31": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "32": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "33": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "34": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "35": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "36": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "37": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "38": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "39": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "40": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "41": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "42": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "43": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "44": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "45": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "46": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "47": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "48": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "49": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "50": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "51": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "52": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "53": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "54": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "55": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "56": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "57": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "58": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "59": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + }, + "60": { + "attention": { + "comp_time": 52756.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 156530.0, + "comm_size": 21728.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws64-tp8-pp1-ep8-bs1-seq100-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws64-tp8-pp1-ep8-bs1-seq100-decode.json new file mode 100644 index 00000000..a358ce46 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws64-tp8-pp1-ep8-bs1-seq100-decode.json @@ -0,0 +1,612 @@ +{ + "0": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32399.0, + "comm_size": 14336.0 + } + }, + "1": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32399.0, + "comm_size": 14336.0 + } + }, + "2": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32399.0, + "comm_size": 14336.0 + } + }, + "3": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "4": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "5": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "6": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "7": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "8": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "9": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "10": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "11": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "12": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "13": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "14": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "15": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "16": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "17": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "18": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "19": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "20": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "21": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "22": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "23": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "24": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "25": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "26": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "27": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "28": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "29": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "30": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "31": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "32": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "33": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "34": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "35": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "36": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "37": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "38": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "39": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "40": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "41": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "42": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "43": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "44": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "45": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "46": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "47": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "48": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "49": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "50": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "51": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "52": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "53": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "54": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "55": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "56": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "57": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "58": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "59": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + }, + "60": { + "attention": { + "comp_time": 52948.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131920.0, + "comm_size": 21728.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws64-tp8-pp1-ep8-bs1-seq106-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws64-tp8-pp1-ep8-bs1-seq106-decode.json new file mode 100644 index 00000000..752f265f --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-DeepSeek-671B-ws64-tp8-pp1-ep8-bs1-seq106-decode.json @@ -0,0 +1,612 @@ +{ + "0": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32355.0, + "comm_size": 14336.0 + } + }, + "1": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32355.0, + "comm_size": 14336.0 + } + }, + "2": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "mlp": { + "comp_time": 32355.0, + "comm_size": 14336.0 + } + }, + "3": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "4": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "5": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "6": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "7": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "8": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "9": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "10": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "11": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "12": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "13": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "14": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "15": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "16": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "17": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "18": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "19": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "20": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "21": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "22": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "23": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "24": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "25": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "26": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "27": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "28": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "29": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "30": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "31": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "32": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "33": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "34": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "35": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "36": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "37": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "38": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "39": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "40": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "41": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "42": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "43": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "44": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "45": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "46": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "47": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "48": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "49": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "50": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "51": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "52": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "53": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "54": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "55": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "56": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "57": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "58": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "59": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + }, + "60": { + "attention": { + "comp_time": 52742.0, + "comm_size": 14336.0 + }, + "moe": { + "comp_time": 131768.0, + "comm_size": 21728.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws24-tp4-pp1-ep24-bs1-seq100-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws24-tp4-pp1-ep24-bs1-seq100-decode.json new file mode 100644 index 00000000..621adc52 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws24-tp4-pp1-ep24-bs1-seq100-decode.json @@ -0,0 +1,942 @@ +{ + "0": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "1": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "2": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "3": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "4": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "5": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "6": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "7": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "8": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "9": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "10": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "11": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "12": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "13": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "14": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "15": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "16": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "17": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "18": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "19": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "20": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "21": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "22": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "23": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "24": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "25": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "26": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "27": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "28": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "29": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "30": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "31": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "32": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "33": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "34": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "35": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "36": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "37": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "38": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "39": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "40": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "41": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "42": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "43": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "44": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "45": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "46": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "47": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "48": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "49": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "50": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "51": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "52": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "53": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "54": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "55": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "56": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "57": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "58": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "59": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "60": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "61": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "62": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "63": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "64": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "65": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "66": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "67": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "68": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "69": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "70": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "71": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "72": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "73": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "74": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "75": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "76": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "77": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "78": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "79": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "80": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "81": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "82": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "83": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "84": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "85": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "86": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "87": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "88": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "89": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "90": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "91": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "92": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + }, + "93": { + "attention": { + "comp_time": 35721.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68728.0, + "comm_size": 24832.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws24-tp4-pp1-ep24-bs1-seq106-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws24-tp4-pp1-ep24-bs1-seq106-decode.json new file mode 100644 index 00000000..157eb09f --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws24-tp4-pp1-ep24-bs1-seq106-decode.json @@ -0,0 +1,942 @@ +{ + "0": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "1": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "2": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "3": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "4": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "5": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "6": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "7": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "8": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "9": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "10": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "11": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "12": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "13": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "14": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "15": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "16": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "17": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "18": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "19": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "20": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "21": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "22": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "23": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "24": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "25": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "26": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "27": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "28": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "29": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "30": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "31": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "32": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "33": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "34": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "35": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "36": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "37": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "38": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "39": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "40": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "41": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "42": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "43": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "44": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "45": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "46": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "47": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "48": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "49": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "50": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "51": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "52": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "53": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "54": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "55": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "56": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "57": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "58": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "59": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "60": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "61": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "62": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "63": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "64": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "65": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "66": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "67": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "68": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "69": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "70": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "71": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "72": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "73": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "74": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "75": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "76": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "77": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "78": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "79": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "80": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "81": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "82": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "83": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "84": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "85": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "86": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "87": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "88": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "89": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "90": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "91": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "92": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + }, + "93": { + "attention": { + "comp_time": 35604.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 68773.0, + "comm_size": 24832.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws32-tp4-pp1-ep4-bs1-seq100-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws32-tp4-pp1-ep4-bs1-seq100-decode.json new file mode 100644 index 00000000..ad58223c --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws32-tp4-pp1-ep4-bs1-seq100-decode.json @@ -0,0 +1,942 @@ +{ + "0": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "1": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "2": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "3": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "4": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "5": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "6": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "7": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "8": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "9": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "10": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "11": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "12": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "13": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "14": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "15": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "16": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "17": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "18": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "19": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "20": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "21": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "22": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "23": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "24": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "25": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "26": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "27": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "28": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "29": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "30": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "31": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "32": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "33": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "34": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "35": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "36": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "37": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "38": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "39": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "40": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "41": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "42": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "43": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "44": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "45": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "46": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "47": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "48": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "49": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "50": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "51": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "52": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "53": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "54": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "55": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "56": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "57": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "58": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "59": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "60": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "61": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "62": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "63": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "64": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "65": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "66": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "67": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "68": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "69": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "70": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "71": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "72": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "73": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "74": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "75": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "76": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "77": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "78": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "79": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "80": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "81": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "82": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "83": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "84": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "85": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "86": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "87": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "88": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "89": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "90": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "91": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "92": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + }, + "93": { + "attention": { + "comp_time": 35698.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59069.0, + "comm_size": 24832.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws32-tp4-pp1-ep4-bs1-seq100-prefill.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws32-tp4-pp1-ep4-bs1-seq100-prefill.json new file mode 100644 index 00000000..99427d97 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws32-tp4-pp1-ep4-bs1-seq100-prefill.json @@ -0,0 +1,942 @@ +{ + "0": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "1": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "2": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "3": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "4": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "5": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "6": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "7": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "8": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "9": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "10": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "11": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "12": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "13": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "14": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "15": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "16": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "17": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "18": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "19": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "20": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "21": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "22": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "23": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "24": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "25": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "26": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "27": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "28": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "29": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "30": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "31": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "32": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "33": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "34": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "35": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "36": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "37": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "38": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "39": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "40": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "41": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "42": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "43": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "44": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "45": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "46": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "47": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "48": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "49": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "50": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "51": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "52": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "53": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "54": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "55": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "56": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "57": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "58": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "59": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "60": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "61": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "62": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "63": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "64": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "65": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "66": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "67": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "68": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "69": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "70": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "71": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "72": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "73": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "74": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "75": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "76": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "77": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "78": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "79": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "80": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "81": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "82": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "83": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "84": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "85": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "86": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "87": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "88": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "89": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "90": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "91": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "92": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + }, + "93": { + "attention": { + "comp_time": 44798.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 182516.0, + "comm_size": 2483200.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws32-tp4-pp1-ep4-bs1-seq106-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws32-tp4-pp1-ep4-bs1-seq106-decode.json new file mode 100644 index 00000000..3ca24de6 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws32-tp4-pp1-ep4-bs1-seq106-decode.json @@ -0,0 +1,942 @@ +{ + "0": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "1": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "2": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "3": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "4": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "5": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "6": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "7": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "8": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "9": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "10": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "11": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "12": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "13": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "14": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "15": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "16": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "17": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "18": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "19": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "20": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "21": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "22": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "23": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "24": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "25": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "26": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "27": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "28": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "29": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "30": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "31": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "32": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "33": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "34": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "35": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "36": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "37": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "38": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "39": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "40": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "41": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "42": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "43": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "44": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "45": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "46": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "47": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "48": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "49": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "50": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "51": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "52": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "53": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "54": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "55": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "56": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "57": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "58": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "59": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "60": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "61": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "62": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "63": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "64": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "65": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "66": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "67": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "68": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "69": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "70": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "71": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "72": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "73": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "74": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "75": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "76": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "77": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "78": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "79": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "80": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "81": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "82": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "83": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "84": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "85": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "86": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "87": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "88": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "89": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "90": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "91": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "92": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + }, + "93": { + "attention": { + "comp_time": 35941.0, + "comm_size": 8192.0 + }, + "moe": { + "comp_time": 59133.0, + "comm_size": 24832.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws8-tp4-pp1-ep8-bs1-seq100-prefill.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws8-tp4-pp1-ep8-bs1-seq100-prefill.json new file mode 100644 index 00000000..85363161 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Moe-235B-ws8-tp4-pp1-ep8-bs1-seq100-prefill.json @@ -0,0 +1,942 @@ +{ + "0": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "1": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "2": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "3": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "4": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "5": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "6": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "7": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "8": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "9": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "10": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "11": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "12": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "13": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "14": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "15": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "16": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "17": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "18": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "19": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "20": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "21": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "22": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "23": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "24": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "25": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "26": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "27": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "28": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "29": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "30": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "31": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "32": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "33": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "34": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "35": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "36": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "37": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "38": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "39": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "40": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "41": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "42": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "43": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "44": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "45": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "46": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "47": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "48": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "49": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "50": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "51": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "52": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "53": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "54": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "55": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "56": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "57": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "58": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "59": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "60": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "61": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "62": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "63": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "64": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "65": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "66": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "67": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "68": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "69": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "70": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "71": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "72": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "73": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "74": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "75": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "76": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "77": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "78": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "79": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "80": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "81": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "82": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "83": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "84": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "85": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "86": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "87": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "88": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "89": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "90": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "91": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "92": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + }, + "93": { + "attention": { + "comp_time": 44945.0, + "comm_size": 819200.0 + }, + "moe": { + "comp_time": 197980.0, + "comm_size": 2483200.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws2-tp1-pp1-ep2-bs1-seq100-prefill.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws2-tp1-pp1-ep2-bs1-seq100-prefill.json new file mode 100644 index 00000000..0f7bf666 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws2-tp1-pp1-ep2-bs1-seq100-prefill.json @@ -0,0 +1,482 @@ +{ + "0": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "1": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "2": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "3": { + "attention": { + "comp_time": 53624.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "4": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "5": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "6": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "7": { + "attention": { + "comp_time": 53624.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "8": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "9": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "10": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "11": { + "attention": { + "comp_time": 53624.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "12": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "13": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "14": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "15": { + "attention": { + "comp_time": 53624.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "16": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "17": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "18": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "19": { + "attention": { + "comp_time": 53624.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "20": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "21": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "22": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "23": { + "attention": { + "comp_time": 53624.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "24": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "25": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "26": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "27": { + "attention": { + "comp_time": 53624.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "28": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "29": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "30": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "31": { + "attention": { + "comp_time": 53624.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "32": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "33": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "34": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "35": { + "attention": { + "comp_time": 53624.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "36": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "37": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "38": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "39": { + "attention": { + "comp_time": 53624.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "40": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "41": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "42": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "43": { + "attention": { + "comp_time": 53624.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "44": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "45": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "46": { + "attention": { + "comp_time": 45585.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + }, + "47": { + "attention": { + "comp_time": 53624.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 496646.0, + "comm_size": 6208000.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws32-tp1-pp1-ep32-bs0-seq0-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws32-tp1-pp1-ep32-bs0-seq0-decode.json new file mode 100644 index 00000000..d402dd6d --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws32-tp1-pp1-ep32-bs0-seq0-decode.json @@ -0,0 +1,482 @@ +{ + "0": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "1": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "2": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "3": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "4": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "5": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "6": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "7": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "8": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "9": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "10": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "11": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "12": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "13": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "14": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "15": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "16": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "17": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "18": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "19": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "20": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "21": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "22": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "23": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "24": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "25": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "26": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "27": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "28": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "29": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "30": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "31": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "32": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "33": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "34": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "35": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "36": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "37": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "38": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "39": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "40": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "41": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "42": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "43": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "44": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "45": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "46": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "47": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws32-tp1-pp1-ep32-bs1-seq100-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws32-tp1-pp1-ep32-bs1-seq100-decode.json new file mode 100644 index 00000000..d402dd6d --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws32-tp1-pp1-ep32-bs1-seq100-decode.json @@ -0,0 +1,482 @@ +{ + "0": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "1": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "2": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "3": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "4": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "5": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "6": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "7": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "8": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "9": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "10": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "11": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "12": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "13": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "14": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "15": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "16": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "17": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "18": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "19": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "20": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "21": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "22": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "23": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "24": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "25": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "26": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "27": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "28": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "29": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "30": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "31": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "32": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "33": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "34": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "35": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "36": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "37": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "38": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "39": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "40": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "41": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "42": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "43": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "44": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "45": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "46": { + "attention": { + "comp_time": 26828.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + }, + "47": { + "attention": { + "comp_time": 42077.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47850.0, + "comm_size": 62080.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws32-tp1-pp1-ep32-bs1-seq106-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws32-tp1-pp1-ep32-bs1-seq106-decode.json new file mode 100644 index 00000000..f04770eb --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws32-tp1-pp1-ep32-bs1-seq106-decode.json @@ -0,0 +1,482 @@ +{ + "0": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "1": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "2": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "3": { + "attention": { + "comp_time": 41999.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "4": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "5": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "6": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "7": { + "attention": { + "comp_time": 41999.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "8": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "9": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "10": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "11": { + "attention": { + "comp_time": 41999.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "12": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "13": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "14": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "15": { + "attention": { + "comp_time": 41999.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "16": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "17": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "18": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "19": { + "attention": { + "comp_time": 41999.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "20": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "21": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "22": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "23": { + "attention": { + "comp_time": 41999.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "24": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "25": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "26": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "27": { + "attention": { + "comp_time": 41999.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "28": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "29": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "30": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "31": { + "attention": { + "comp_time": 41999.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "32": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "33": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "34": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "35": { + "attention": { + "comp_time": 41999.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "36": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "37": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "38": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "39": { + "attention": { + "comp_time": 41999.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "40": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "41": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "42": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "43": { + "attention": { + "comp_time": 41999.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "44": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "45": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "46": { + "attention": { + "comp_time": 27045.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + }, + "47": { + "attention": { + "comp_time": 41999.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 47979.0, + "comm_size": 62080.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws6-tp1-pp1-ep6-bs1-seq100-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws6-tp1-pp1-ep6-bs1-seq100-decode.json new file mode 100644 index 00000000..89ba9ead --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws6-tp1-pp1-ep6-bs1-seq100-decode.json @@ -0,0 +1,482 @@ +{ + "0": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "1": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "2": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "3": { + "attention": { + "comp_time": 41834.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "4": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "5": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "6": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "7": { + "attention": { + "comp_time": 41834.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "8": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "9": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "10": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "11": { + "attention": { + "comp_time": 41834.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "12": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "13": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "14": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "15": { + "attention": { + "comp_time": 41834.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "16": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "17": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "18": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "19": { + "attention": { + "comp_time": 41834.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "20": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "21": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "22": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "23": { + "attention": { + "comp_time": 41834.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "24": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "25": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "26": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "27": { + "attention": { + "comp_time": 41834.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "28": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "29": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "30": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "31": { + "attention": { + "comp_time": 41834.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "32": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "33": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "34": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "35": { + "attention": { + "comp_time": 41834.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "36": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "37": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "38": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "39": { + "attention": { + "comp_time": 41834.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "40": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "41": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "42": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "43": { + "attention": { + "comp_time": 41834.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "44": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "45": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "46": { + "attention": { + "comp_time": 26930.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + }, + "47": { + "attention": { + "comp_time": 41834.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132351.0, + "comm_size": 62080.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws6-tp1-pp1-ep6-bs1-seq106-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws6-tp1-pp1-ep6-bs1-seq106-decode.json new file mode 100644 index 00000000..1cdd7d98 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws6-tp1-pp1-ep6-bs1-seq106-decode.json @@ -0,0 +1,482 @@ +{ + "0": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "1": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "2": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "3": { + "attention": { + "comp_time": 41884.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "4": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "5": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "6": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "7": { + "attention": { + "comp_time": 41884.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "8": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "9": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "10": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "11": { + "attention": { + "comp_time": 41884.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "12": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "13": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "14": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "15": { + "attention": { + "comp_time": 41884.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "16": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "17": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "18": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "19": { + "attention": { + "comp_time": 41884.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "20": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "21": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "22": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "23": { + "attention": { + "comp_time": 41884.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "24": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "25": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "26": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "27": { + "attention": { + "comp_time": 41884.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "28": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "29": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "30": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "31": { + "attention": { + "comp_time": 41884.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "32": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "33": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "34": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "35": { + "attention": { + "comp_time": 41884.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "36": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "37": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "38": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "39": { + "attention": { + "comp_time": 41884.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "40": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "41": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "42": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "43": { + "attention": { + "comp_time": 41884.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "44": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "45": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "46": { + "attention": { + "comp_time": 26979.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + }, + "47": { + "attention": { + "comp_time": 41884.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 132058.0, + "comm_size": 62080.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws8-tp1-pp1-ep8-bs1-seq100-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws8-tp1-pp1-ep8-bs1-seq100-decode.json new file mode 100644 index 00000000..2097a6b9 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws8-tp1-pp1-ep8-bs1-seq100-decode.json @@ -0,0 +1,482 @@ +{ + "0": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "1": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "2": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "3": { + "attention": { + "comp_time": 42127.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "4": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "5": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "6": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "7": { + "attention": { + "comp_time": 42127.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "8": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "9": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "10": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "11": { + "attention": { + "comp_time": 42127.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "12": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "13": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "14": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "15": { + "attention": { + "comp_time": 42127.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "16": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "17": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "18": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "19": { + "attention": { + "comp_time": 42127.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "20": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "21": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "22": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "23": { + "attention": { + "comp_time": 42127.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "24": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "25": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "26": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "27": { + "attention": { + "comp_time": 42127.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "28": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "29": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "30": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "31": { + "attention": { + "comp_time": 42127.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "32": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "33": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "34": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "35": { + "attention": { + "comp_time": 42127.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "36": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "37": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "38": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "39": { + "attention": { + "comp_time": 42127.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "40": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "41": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "42": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "43": { + "attention": { + "comp_time": 42127.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "44": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "45": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "46": { + "attention": { + "comp_time": 26734.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + }, + "47": { + "attention": { + "comp_time": 42127.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113280.0, + "comm_size": 62080.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws8-tp1-pp1-ep8-bs1-seq100-prefill.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws8-tp1-pp1-ep8-bs1-seq100-prefill.json new file mode 100644 index 00000000..a4ae0690 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws8-tp1-pp1-ep8-bs1-seq100-prefill.json @@ -0,0 +1,482 @@ +{ + "0": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "1": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "2": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "3": { + "attention": { + "comp_time": 53616.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "4": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "5": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "6": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "7": { + "attention": { + "comp_time": 53616.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "8": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "9": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "10": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "11": { + "attention": { + "comp_time": 53616.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "12": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "13": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "14": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "15": { + "attention": { + "comp_time": 53616.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "16": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "17": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "18": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "19": { + "attention": { + "comp_time": 53616.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "20": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "21": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "22": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "23": { + "attention": { + "comp_time": 53616.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "24": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "25": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "26": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "27": { + "attention": { + "comp_time": 53616.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "28": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "29": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "30": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "31": { + "attention": { + "comp_time": 53616.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "32": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "33": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "34": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "35": { + "attention": { + "comp_time": 53616.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "36": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "37": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "38": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "39": { + "attention": { + "comp_time": 53616.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "40": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "41": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "42": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "43": { + "attention": { + "comp_time": 53616.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "44": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "45": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "46": { + "attention": { + "comp_time": 45839.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + }, + "47": { + "attention": { + "comp_time": 53616.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 146500.0, + "comm_size": 6208000.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws8-tp1-pp1-ep8-bs1-seq106-decode.json b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws8-tp1-pp1-ep8-bs1-seq106-decode.json new file mode 100644 index 00000000..3a443ef0 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/aicb-Qwen3-Next-80B-ws8-tp1-pp1-ep8-bs1-seq106-decode.json @@ -0,0 +1,482 @@ +{ + "0": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "1": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "2": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "3": { + "attention": { + "comp_time": 42109.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "4": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "5": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "6": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "7": { + "attention": { + "comp_time": 42109.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "8": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "9": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "10": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "11": { + "attention": { + "comp_time": 42109.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "12": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "13": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "14": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "15": { + "attention": { + "comp_time": 42109.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "16": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "17": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "18": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "19": { + "attention": { + "comp_time": 42109.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "20": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "21": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "22": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "23": { + "attention": { + "comp_time": 42109.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "24": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "25": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "26": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "27": { + "attention": { + "comp_time": 42109.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "28": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "29": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "30": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "31": { + "attention": { + "comp_time": 42109.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "32": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "33": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "34": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "35": { + "attention": { + "comp_time": 42109.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "36": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "37": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "38": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "39": { + "attention": { + "comp_time": 42109.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "40": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "41": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "42": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "43": { + "attention": { + "comp_time": 42109.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "44": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "45": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "46": { + "attention": { + "comp_time": 26914.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + }, + "47": { + "attention": { + "comp_time": 42109.0, + "comm_size": 0.0 + }, + "moe": { + "comp_time": 113099.0, + "comm_size": 62080.0 + } + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-DeepSeek-671B-world_size48-tp8-pp1-ep48-bs1-seq100-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-DeepSeek-671B-world_size48-tp8-pp1-ep48-bs1-seq100-decode.csv new file mode 100644 index 00000000..300fcf50 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-DeepSeek-671B-world_size48-tp8-pp1-ep48-bs1-seq100-decode.csv @@ -0,0 +1,123 @@ +layer_id layer_name comp_time comm_size +0 attention 53006 14336 +0 mlp 32216 14336 +1 attention 53006 14336 +1 mlp 32216 14336 +2 attention 53006 14336 +2 mlp 32216 14336 +3 attention 53006 14336 +3 moe 156560 21728 +4 attention 53006 14336 +4 moe 156560 21728 +5 attention 53006 14336 +5 moe 156560 21728 +6 attention 53006 14336 +6 moe 156560 21728 +7 attention 53006 14336 +7 moe 156560 21728 +8 attention 53006 14336 +8 moe 156560 21728 +9 attention 53006 14336 +9 moe 156560 21728 +10 attention 53006 14336 +10 moe 156560 21728 +11 attention 53006 14336 +11 moe 156560 21728 +12 attention 53006 14336 +12 moe 156560 21728 +13 attention 53006 14336 +13 moe 156560 21728 +14 attention 53006 14336 +14 moe 156560 21728 +15 attention 53006 14336 +15 moe 156560 21728 +16 attention 53006 14336 +16 moe 156560 21728 +17 attention 53006 14336 +17 moe 156560 21728 +18 attention 53006 14336 +18 moe 156560 21728 +19 attention 53006 14336 +19 moe 156560 21728 +20 attention 53006 14336 +20 moe 156560 21728 +21 attention 53006 14336 +21 moe 156560 21728 +22 attention 53006 14336 +22 moe 156560 21728 +23 attention 53006 14336 +23 moe 156560 21728 +24 attention 53006 14336 +24 moe 156560 21728 +25 attention 53006 14336 +25 moe 156560 21728 +26 attention 53006 14336 +26 moe 156560 21728 +27 attention 53006 14336 +27 moe 156560 21728 +28 attention 53006 14336 +28 moe 156560 21728 +29 attention 53006 14336 +29 moe 156560 21728 +30 attention 53006 14336 +30 moe 156560 21728 +31 attention 53006 14336 +31 moe 156560 21728 +32 attention 53006 14336 +32 moe 156560 21728 +33 attention 53006 14336 +33 moe 156560 21728 +34 attention 53006 14336 +34 moe 156560 21728 +35 attention 53006 14336 +35 moe 156560 21728 +36 attention 53006 14336 +36 moe 156560 21728 +37 attention 53006 14336 +37 moe 156560 21728 +38 attention 53006 14336 +38 moe 156560 21728 +39 attention 53006 14336 +39 moe 156560 21728 +40 attention 53006 14336 +40 moe 156560 21728 +41 attention 53006 14336 +41 moe 156560 21728 +42 attention 53006 14336 +42 moe 156560 21728 +43 attention 53006 14336 +43 moe 156560 21728 +44 attention 53006 14336 +44 moe 156560 21728 +45 attention 53006 14336 +45 moe 156560 21728 +46 attention 53006 14336 +46 moe 156560 21728 +47 attention 53006 14336 +47 moe 156560 21728 +48 attention 53006 14336 +48 moe 156560 21728 +49 attention 53006 14336 +49 moe 156560 21728 +50 attention 53006 14336 +50 moe 156560 21728 +51 attention 53006 14336 +51 moe 156560 21728 +52 attention 53006 14336 +52 moe 156560 21728 +53 attention 53006 14336 +53 moe 156560 21728 +54 attention 53006 14336 +54 moe 156560 21728 +55 attention 53006 14336 +55 moe 156560 21728 +56 attention 53006 14336 +56 moe 156560 21728 +57 attention 53006 14336 +57 moe 156560 21728 +58 attention 53006 14336 +58 moe 156560 21728 +59 attention 53006 14336 +59 moe 156560 21728 +60 attention 53006 14336 +60 moe 156560 21728 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-DeepSeek-671B-world_size48-tp8-pp1-ep48-bs1-seq106-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-DeepSeek-671B-world_size48-tp8-pp1-ep48-bs1-seq106-decode.csv new file mode 100644 index 00000000..7e313039 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-DeepSeek-671B-world_size48-tp8-pp1-ep48-bs1-seq106-decode.csv @@ -0,0 +1,123 @@ +layer_id layer_name comp_time comm_size +0 attention 52756 14336 +0 mlp 32274 14336 +1 attention 52756 14336 +1 mlp 32274 14336 +2 attention 52756 14336 +2 mlp 32274 14336 +3 attention 52756 14336 +3 moe 156530 21728 +4 attention 52756 14336 +4 moe 156530 21728 +5 attention 52756 14336 +5 moe 156530 21728 +6 attention 52756 14336 +6 moe 156530 21728 +7 attention 52756 14336 +7 moe 156530 21728 +8 attention 52756 14336 +8 moe 156530 21728 +9 attention 52756 14336 +9 moe 156530 21728 +10 attention 52756 14336 +10 moe 156530 21728 +11 attention 52756 14336 +11 moe 156530 21728 +12 attention 52756 14336 +12 moe 156530 21728 +13 attention 52756 14336 +13 moe 156530 21728 +14 attention 52756 14336 +14 moe 156530 21728 +15 attention 52756 14336 +15 moe 156530 21728 +16 attention 52756 14336 +16 moe 156530 21728 +17 attention 52756 14336 +17 moe 156530 21728 +18 attention 52756 14336 +18 moe 156530 21728 +19 attention 52756 14336 +19 moe 156530 21728 +20 attention 52756 14336 +20 moe 156530 21728 +21 attention 52756 14336 +21 moe 156530 21728 +22 attention 52756 14336 +22 moe 156530 21728 +23 attention 52756 14336 +23 moe 156530 21728 +24 attention 52756 14336 +24 moe 156530 21728 +25 attention 52756 14336 +25 moe 156530 21728 +26 attention 52756 14336 +26 moe 156530 21728 +27 attention 52756 14336 +27 moe 156530 21728 +28 attention 52756 14336 +28 moe 156530 21728 +29 attention 52756 14336 +29 moe 156530 21728 +30 attention 52756 14336 +30 moe 156530 21728 +31 attention 52756 14336 +31 moe 156530 21728 +32 attention 52756 14336 +32 moe 156530 21728 +33 attention 52756 14336 +33 moe 156530 21728 +34 attention 52756 14336 +34 moe 156530 21728 +35 attention 52756 14336 +35 moe 156530 21728 +36 attention 52756 14336 +36 moe 156530 21728 +37 attention 52756 14336 +37 moe 156530 21728 +38 attention 52756 14336 +38 moe 156530 21728 +39 attention 52756 14336 +39 moe 156530 21728 +40 attention 52756 14336 +40 moe 156530 21728 +41 attention 52756 14336 +41 moe 156530 21728 +42 attention 52756 14336 +42 moe 156530 21728 +43 attention 52756 14336 +43 moe 156530 21728 +44 attention 52756 14336 +44 moe 156530 21728 +45 attention 52756 14336 +45 moe 156530 21728 +46 attention 52756 14336 +46 moe 156530 21728 +47 attention 52756 14336 +47 moe 156530 21728 +48 attention 52756 14336 +48 moe 156530 21728 +49 attention 52756 14336 +49 moe 156530 21728 +50 attention 52756 14336 +50 moe 156530 21728 +51 attention 52756 14336 +51 moe 156530 21728 +52 attention 52756 14336 +52 moe 156530 21728 +53 attention 52756 14336 +53 moe 156530 21728 +54 attention 52756 14336 +54 moe 156530 21728 +55 attention 52756 14336 +55 moe 156530 21728 +56 attention 52756 14336 +56 moe 156530 21728 +57 attention 52756 14336 +57 moe 156530 21728 +58 attention 52756 14336 +58 moe 156530 21728 +59 attention 52756 14336 +59 moe 156530 21728 +60 attention 52756 14336 +60 moe 156530 21728 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-DeepSeek-671B-world_size64-tp8-pp1-ep8-bs1-seq100-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-DeepSeek-671B-world_size64-tp8-pp1-ep8-bs1-seq100-decode.csv new file mode 100644 index 00000000..18f33a8f --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-DeepSeek-671B-world_size64-tp8-pp1-ep8-bs1-seq100-decode.csv @@ -0,0 +1,123 @@ +layer_id layer_name comp_time comm_size +0 attention 52948 14336 +0 mlp 32399 14336 +1 attention 52948 14336 +1 mlp 32399 14336 +2 attention 52948 14336 +2 mlp 32399 14336 +3 attention 52948 14336 +3 moe 131920 21728 +4 attention 52948 14336 +4 moe 131920 21728 +5 attention 52948 14336 +5 moe 131920 21728 +6 attention 52948 14336 +6 moe 131920 21728 +7 attention 52948 14336 +7 moe 131920 21728 +8 attention 52948 14336 +8 moe 131920 21728 +9 attention 52948 14336 +9 moe 131920 21728 +10 attention 52948 14336 +10 moe 131920 21728 +11 attention 52948 14336 +11 moe 131920 21728 +12 attention 52948 14336 +12 moe 131920 21728 +13 attention 52948 14336 +13 moe 131920 21728 +14 attention 52948 14336 +14 moe 131920 21728 +15 attention 52948 14336 +15 moe 131920 21728 +16 attention 52948 14336 +16 moe 131920 21728 +17 attention 52948 14336 +17 moe 131920 21728 +18 attention 52948 14336 +18 moe 131920 21728 +19 attention 52948 14336 +19 moe 131920 21728 +20 attention 52948 14336 +20 moe 131920 21728 +21 attention 52948 14336 +21 moe 131920 21728 +22 attention 52948 14336 +22 moe 131920 21728 +23 attention 52948 14336 +23 moe 131920 21728 +24 attention 52948 14336 +24 moe 131920 21728 +25 attention 52948 14336 +25 moe 131920 21728 +26 attention 52948 14336 +26 moe 131920 21728 +27 attention 52948 14336 +27 moe 131920 21728 +28 attention 52948 14336 +28 moe 131920 21728 +29 attention 52948 14336 +29 moe 131920 21728 +30 attention 52948 14336 +30 moe 131920 21728 +31 attention 52948 14336 +31 moe 131920 21728 +32 attention 52948 14336 +32 moe 131920 21728 +33 attention 52948 14336 +33 moe 131920 21728 +34 attention 52948 14336 +34 moe 131920 21728 +35 attention 52948 14336 +35 moe 131920 21728 +36 attention 52948 14336 +36 moe 131920 21728 +37 attention 52948 14336 +37 moe 131920 21728 +38 attention 52948 14336 +38 moe 131920 21728 +39 attention 52948 14336 +39 moe 131920 21728 +40 attention 52948 14336 +40 moe 131920 21728 +41 attention 52948 14336 +41 moe 131920 21728 +42 attention 52948 14336 +42 moe 131920 21728 +43 attention 52948 14336 +43 moe 131920 21728 +44 attention 52948 14336 +44 moe 131920 21728 +45 attention 52948 14336 +45 moe 131920 21728 +46 attention 52948 14336 +46 moe 131920 21728 +47 attention 52948 14336 +47 moe 131920 21728 +48 attention 52948 14336 +48 moe 131920 21728 +49 attention 52948 14336 +49 moe 131920 21728 +50 attention 52948 14336 +50 moe 131920 21728 +51 attention 52948 14336 +51 moe 131920 21728 +52 attention 52948 14336 +52 moe 131920 21728 +53 attention 52948 14336 +53 moe 131920 21728 +54 attention 52948 14336 +54 moe 131920 21728 +55 attention 52948 14336 +55 moe 131920 21728 +56 attention 52948 14336 +56 moe 131920 21728 +57 attention 52948 14336 +57 moe 131920 21728 +58 attention 52948 14336 +58 moe 131920 21728 +59 attention 52948 14336 +59 moe 131920 21728 +60 attention 52948 14336 +60 moe 131920 21728 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-DeepSeek-671B-world_size64-tp8-pp1-ep8-bs1-seq106-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-DeepSeek-671B-world_size64-tp8-pp1-ep8-bs1-seq106-decode.csv new file mode 100644 index 00000000..a8e3c3fc --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-DeepSeek-671B-world_size64-tp8-pp1-ep8-bs1-seq106-decode.csv @@ -0,0 +1,123 @@ +layer_id layer_name comp_time comm_size +0 attention 52742 14336 +0 mlp 32355 14336 +1 attention 52742 14336 +1 mlp 32355 14336 +2 attention 52742 14336 +2 mlp 32355 14336 +3 attention 52742 14336 +3 moe 131768 21728 +4 attention 52742 14336 +4 moe 131768 21728 +5 attention 52742 14336 +5 moe 131768 21728 +6 attention 52742 14336 +6 moe 131768 21728 +7 attention 52742 14336 +7 moe 131768 21728 +8 attention 52742 14336 +8 moe 131768 21728 +9 attention 52742 14336 +9 moe 131768 21728 +10 attention 52742 14336 +10 moe 131768 21728 +11 attention 52742 14336 +11 moe 131768 21728 +12 attention 52742 14336 +12 moe 131768 21728 +13 attention 52742 14336 +13 moe 131768 21728 +14 attention 52742 14336 +14 moe 131768 21728 +15 attention 52742 14336 +15 moe 131768 21728 +16 attention 52742 14336 +16 moe 131768 21728 +17 attention 52742 14336 +17 moe 131768 21728 +18 attention 52742 14336 +18 moe 131768 21728 +19 attention 52742 14336 +19 moe 131768 21728 +20 attention 52742 14336 +20 moe 131768 21728 +21 attention 52742 14336 +21 moe 131768 21728 +22 attention 52742 14336 +22 moe 131768 21728 +23 attention 52742 14336 +23 moe 131768 21728 +24 attention 52742 14336 +24 moe 131768 21728 +25 attention 52742 14336 +25 moe 131768 21728 +26 attention 52742 14336 +26 moe 131768 21728 +27 attention 52742 14336 +27 moe 131768 21728 +28 attention 52742 14336 +28 moe 131768 21728 +29 attention 52742 14336 +29 moe 131768 21728 +30 attention 52742 14336 +30 moe 131768 21728 +31 attention 52742 14336 +31 moe 131768 21728 +32 attention 52742 14336 +32 moe 131768 21728 +33 attention 52742 14336 +33 moe 131768 21728 +34 attention 52742 14336 +34 moe 131768 21728 +35 attention 52742 14336 +35 moe 131768 21728 +36 attention 52742 14336 +36 moe 131768 21728 +37 attention 52742 14336 +37 moe 131768 21728 +38 attention 52742 14336 +38 moe 131768 21728 +39 attention 52742 14336 +39 moe 131768 21728 +40 attention 52742 14336 +40 moe 131768 21728 +41 attention 52742 14336 +41 moe 131768 21728 +42 attention 52742 14336 +42 moe 131768 21728 +43 attention 52742 14336 +43 moe 131768 21728 +44 attention 52742 14336 +44 moe 131768 21728 +45 attention 52742 14336 +45 moe 131768 21728 +46 attention 52742 14336 +46 moe 131768 21728 +47 attention 52742 14336 +47 moe 131768 21728 +48 attention 52742 14336 +48 moe 131768 21728 +49 attention 52742 14336 +49 moe 131768 21728 +50 attention 52742 14336 +50 moe 131768 21728 +51 attention 52742 14336 +51 moe 131768 21728 +52 attention 52742 14336 +52 moe 131768 21728 +53 attention 52742 14336 +53 moe 131768 21728 +54 attention 52742 14336 +54 moe 131768 21728 +55 attention 52742 14336 +55 moe 131768 21728 +56 attention 52742 14336 +56 moe 131768 21728 +57 attention 52742 14336 +57 moe 131768 21728 +58 attention 52742 14336 +58 moe 131768 21728 +59 attention 52742 14336 +59 moe 131768 21728 +60 attention 52742 14336 +60 moe 131768 21728 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size24-tp4-pp1-ep24-bs1-seq100-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size24-tp4-pp1-ep24-bs1-seq100-decode.csv new file mode 100644 index 00000000..40462a70 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size24-tp4-pp1-ep24-bs1-seq100-decode.csv @@ -0,0 +1,189 @@ +layer_id layer_name comp_time comm_size +0 attention 35721 8192 +0 moe 68728 24832 +1 attention 35721 8192 +1 moe 68728 24832 +2 attention 35721 8192 +2 moe 68728 24832 +3 attention 35721 8192 +3 moe 68728 24832 +4 attention 35721 8192 +4 moe 68728 24832 +5 attention 35721 8192 +5 moe 68728 24832 +6 attention 35721 8192 +6 moe 68728 24832 +7 attention 35721 8192 +7 moe 68728 24832 +8 attention 35721 8192 +8 moe 68728 24832 +9 attention 35721 8192 +9 moe 68728 24832 +10 attention 35721 8192 +10 moe 68728 24832 +11 attention 35721 8192 +11 moe 68728 24832 +12 attention 35721 8192 +12 moe 68728 24832 +13 attention 35721 8192 +13 moe 68728 24832 +14 attention 35721 8192 +14 moe 68728 24832 +15 attention 35721 8192 +15 moe 68728 24832 +16 attention 35721 8192 +16 moe 68728 24832 +17 attention 35721 8192 +17 moe 68728 24832 +18 attention 35721 8192 +18 moe 68728 24832 +19 attention 35721 8192 +19 moe 68728 24832 +20 attention 35721 8192 +20 moe 68728 24832 +21 attention 35721 8192 +21 moe 68728 24832 +22 attention 35721 8192 +22 moe 68728 24832 +23 attention 35721 8192 +23 moe 68728 24832 +24 attention 35721 8192 +24 moe 68728 24832 +25 attention 35721 8192 +25 moe 68728 24832 +26 attention 35721 8192 +26 moe 68728 24832 +27 attention 35721 8192 +27 moe 68728 24832 +28 attention 35721 8192 +28 moe 68728 24832 +29 attention 35721 8192 +29 moe 68728 24832 +30 attention 35721 8192 +30 moe 68728 24832 +31 attention 35721 8192 +31 moe 68728 24832 +32 attention 35721 8192 +32 moe 68728 24832 +33 attention 35721 8192 +33 moe 68728 24832 +34 attention 35721 8192 +34 moe 68728 24832 +35 attention 35721 8192 +35 moe 68728 24832 +36 attention 35721 8192 +36 moe 68728 24832 +37 attention 35721 8192 +37 moe 68728 24832 +38 attention 35721 8192 +38 moe 68728 24832 +39 attention 35721 8192 +39 moe 68728 24832 +40 attention 35721 8192 +40 moe 68728 24832 +41 attention 35721 8192 +41 moe 68728 24832 +42 attention 35721 8192 +42 moe 68728 24832 +43 attention 35721 8192 +43 moe 68728 24832 +44 attention 35721 8192 +44 moe 68728 24832 +45 attention 35721 8192 +45 moe 68728 24832 +46 attention 35721 8192 +46 moe 68728 24832 +47 attention 35721 8192 +47 moe 68728 24832 +48 attention 35721 8192 +48 moe 68728 24832 +49 attention 35721 8192 +49 moe 68728 24832 +50 attention 35721 8192 +50 moe 68728 24832 +51 attention 35721 8192 +51 moe 68728 24832 +52 attention 35721 8192 +52 moe 68728 24832 +53 attention 35721 8192 +53 moe 68728 24832 +54 attention 35721 8192 +54 moe 68728 24832 +55 attention 35721 8192 +55 moe 68728 24832 +56 attention 35721 8192 +56 moe 68728 24832 +57 attention 35721 8192 +57 moe 68728 24832 +58 attention 35721 8192 +58 moe 68728 24832 +59 attention 35721 8192 +59 moe 68728 24832 +60 attention 35721 8192 +60 moe 68728 24832 +61 attention 35721 8192 +61 moe 68728 24832 +62 attention 35721 8192 +62 moe 68728 24832 +63 attention 35721 8192 +63 moe 68728 24832 +64 attention 35721 8192 +64 moe 68728 24832 +65 attention 35721 8192 +65 moe 68728 24832 +66 attention 35721 8192 +66 moe 68728 24832 +67 attention 35721 8192 +67 moe 68728 24832 +68 attention 35721 8192 +68 moe 68728 24832 +69 attention 35721 8192 +69 moe 68728 24832 +70 attention 35721 8192 +70 moe 68728 24832 +71 attention 35721 8192 +71 moe 68728 24832 +72 attention 35721 8192 +72 moe 68728 24832 +73 attention 35721 8192 +73 moe 68728 24832 +74 attention 35721 8192 +74 moe 68728 24832 +75 attention 35721 8192 +75 moe 68728 24832 +76 attention 35721 8192 +76 moe 68728 24832 +77 attention 35721 8192 +77 moe 68728 24832 +78 attention 35721 8192 +78 moe 68728 24832 +79 attention 35721 8192 +79 moe 68728 24832 +80 attention 35721 8192 +80 moe 68728 24832 +81 attention 35721 8192 +81 moe 68728 24832 +82 attention 35721 8192 +82 moe 68728 24832 +83 attention 35721 8192 +83 moe 68728 24832 +84 attention 35721 8192 +84 moe 68728 24832 +85 attention 35721 8192 +85 moe 68728 24832 +86 attention 35721 8192 +86 moe 68728 24832 +87 attention 35721 8192 +87 moe 68728 24832 +88 attention 35721 8192 +88 moe 68728 24832 +89 attention 35721 8192 +89 moe 68728 24832 +90 attention 35721 8192 +90 moe 68728 24832 +91 attention 35721 8192 +91 moe 68728 24832 +92 attention 35721 8192 +92 moe 68728 24832 +93 attention 35721 8192 +93 moe 68728 24832 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size24-tp4-pp1-ep24-bs1-seq106-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size24-tp4-pp1-ep24-bs1-seq106-decode.csv new file mode 100644 index 00000000..264b6177 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size24-tp4-pp1-ep24-bs1-seq106-decode.csv @@ -0,0 +1,189 @@ +layer_id layer_name comp_time comm_size +0 attention 35604 8192 +0 moe 68773 24832 +1 attention 35604 8192 +1 moe 68773 24832 +2 attention 35604 8192 +2 moe 68773 24832 +3 attention 35604 8192 +3 moe 68773 24832 +4 attention 35604 8192 +4 moe 68773 24832 +5 attention 35604 8192 +5 moe 68773 24832 +6 attention 35604 8192 +6 moe 68773 24832 +7 attention 35604 8192 +7 moe 68773 24832 +8 attention 35604 8192 +8 moe 68773 24832 +9 attention 35604 8192 +9 moe 68773 24832 +10 attention 35604 8192 +10 moe 68773 24832 +11 attention 35604 8192 +11 moe 68773 24832 +12 attention 35604 8192 +12 moe 68773 24832 +13 attention 35604 8192 +13 moe 68773 24832 +14 attention 35604 8192 +14 moe 68773 24832 +15 attention 35604 8192 +15 moe 68773 24832 +16 attention 35604 8192 +16 moe 68773 24832 +17 attention 35604 8192 +17 moe 68773 24832 +18 attention 35604 8192 +18 moe 68773 24832 +19 attention 35604 8192 +19 moe 68773 24832 +20 attention 35604 8192 +20 moe 68773 24832 +21 attention 35604 8192 +21 moe 68773 24832 +22 attention 35604 8192 +22 moe 68773 24832 +23 attention 35604 8192 +23 moe 68773 24832 +24 attention 35604 8192 +24 moe 68773 24832 +25 attention 35604 8192 +25 moe 68773 24832 +26 attention 35604 8192 +26 moe 68773 24832 +27 attention 35604 8192 +27 moe 68773 24832 +28 attention 35604 8192 +28 moe 68773 24832 +29 attention 35604 8192 +29 moe 68773 24832 +30 attention 35604 8192 +30 moe 68773 24832 +31 attention 35604 8192 +31 moe 68773 24832 +32 attention 35604 8192 +32 moe 68773 24832 +33 attention 35604 8192 +33 moe 68773 24832 +34 attention 35604 8192 +34 moe 68773 24832 +35 attention 35604 8192 +35 moe 68773 24832 +36 attention 35604 8192 +36 moe 68773 24832 +37 attention 35604 8192 +37 moe 68773 24832 +38 attention 35604 8192 +38 moe 68773 24832 +39 attention 35604 8192 +39 moe 68773 24832 +40 attention 35604 8192 +40 moe 68773 24832 +41 attention 35604 8192 +41 moe 68773 24832 +42 attention 35604 8192 +42 moe 68773 24832 +43 attention 35604 8192 +43 moe 68773 24832 +44 attention 35604 8192 +44 moe 68773 24832 +45 attention 35604 8192 +45 moe 68773 24832 +46 attention 35604 8192 +46 moe 68773 24832 +47 attention 35604 8192 +47 moe 68773 24832 +48 attention 35604 8192 +48 moe 68773 24832 +49 attention 35604 8192 +49 moe 68773 24832 +50 attention 35604 8192 +50 moe 68773 24832 +51 attention 35604 8192 +51 moe 68773 24832 +52 attention 35604 8192 +52 moe 68773 24832 +53 attention 35604 8192 +53 moe 68773 24832 +54 attention 35604 8192 +54 moe 68773 24832 +55 attention 35604 8192 +55 moe 68773 24832 +56 attention 35604 8192 +56 moe 68773 24832 +57 attention 35604 8192 +57 moe 68773 24832 +58 attention 35604 8192 +58 moe 68773 24832 +59 attention 35604 8192 +59 moe 68773 24832 +60 attention 35604 8192 +60 moe 68773 24832 +61 attention 35604 8192 +61 moe 68773 24832 +62 attention 35604 8192 +62 moe 68773 24832 +63 attention 35604 8192 +63 moe 68773 24832 +64 attention 35604 8192 +64 moe 68773 24832 +65 attention 35604 8192 +65 moe 68773 24832 +66 attention 35604 8192 +66 moe 68773 24832 +67 attention 35604 8192 +67 moe 68773 24832 +68 attention 35604 8192 +68 moe 68773 24832 +69 attention 35604 8192 +69 moe 68773 24832 +70 attention 35604 8192 +70 moe 68773 24832 +71 attention 35604 8192 +71 moe 68773 24832 +72 attention 35604 8192 +72 moe 68773 24832 +73 attention 35604 8192 +73 moe 68773 24832 +74 attention 35604 8192 +74 moe 68773 24832 +75 attention 35604 8192 +75 moe 68773 24832 +76 attention 35604 8192 +76 moe 68773 24832 +77 attention 35604 8192 +77 moe 68773 24832 +78 attention 35604 8192 +78 moe 68773 24832 +79 attention 35604 8192 +79 moe 68773 24832 +80 attention 35604 8192 +80 moe 68773 24832 +81 attention 35604 8192 +81 moe 68773 24832 +82 attention 35604 8192 +82 moe 68773 24832 +83 attention 35604 8192 +83 moe 68773 24832 +84 attention 35604 8192 +84 moe 68773 24832 +85 attention 35604 8192 +85 moe 68773 24832 +86 attention 35604 8192 +86 moe 68773 24832 +87 attention 35604 8192 +87 moe 68773 24832 +88 attention 35604 8192 +88 moe 68773 24832 +89 attention 35604 8192 +89 moe 68773 24832 +90 attention 35604 8192 +90 moe 68773 24832 +91 attention 35604 8192 +91 moe 68773 24832 +92 attention 35604 8192 +92 moe 68773 24832 +93 attention 35604 8192 +93 moe 68773 24832 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size32-tp4-pp1-ep4-bs1-seq100-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size32-tp4-pp1-ep4-bs1-seq100-decode.csv new file mode 100644 index 00000000..856bc528 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size32-tp4-pp1-ep4-bs1-seq100-decode.csv @@ -0,0 +1,189 @@ +layer_id layer_name comp_time comm_size +0 attention 35698 8192 +0 moe 59069 24832 +1 attention 35698 8192 +1 moe 59069 24832 +2 attention 35698 8192 +2 moe 59069 24832 +3 attention 35698 8192 +3 moe 59069 24832 +4 attention 35698 8192 +4 moe 59069 24832 +5 attention 35698 8192 +5 moe 59069 24832 +6 attention 35698 8192 +6 moe 59069 24832 +7 attention 35698 8192 +7 moe 59069 24832 +8 attention 35698 8192 +8 moe 59069 24832 +9 attention 35698 8192 +9 moe 59069 24832 +10 attention 35698 8192 +10 moe 59069 24832 +11 attention 35698 8192 +11 moe 59069 24832 +12 attention 35698 8192 +12 moe 59069 24832 +13 attention 35698 8192 +13 moe 59069 24832 +14 attention 35698 8192 +14 moe 59069 24832 +15 attention 35698 8192 +15 moe 59069 24832 +16 attention 35698 8192 +16 moe 59069 24832 +17 attention 35698 8192 +17 moe 59069 24832 +18 attention 35698 8192 +18 moe 59069 24832 +19 attention 35698 8192 +19 moe 59069 24832 +20 attention 35698 8192 +20 moe 59069 24832 +21 attention 35698 8192 +21 moe 59069 24832 +22 attention 35698 8192 +22 moe 59069 24832 +23 attention 35698 8192 +23 moe 59069 24832 +24 attention 35698 8192 +24 moe 59069 24832 +25 attention 35698 8192 +25 moe 59069 24832 +26 attention 35698 8192 +26 moe 59069 24832 +27 attention 35698 8192 +27 moe 59069 24832 +28 attention 35698 8192 +28 moe 59069 24832 +29 attention 35698 8192 +29 moe 59069 24832 +30 attention 35698 8192 +30 moe 59069 24832 +31 attention 35698 8192 +31 moe 59069 24832 +32 attention 35698 8192 +32 moe 59069 24832 +33 attention 35698 8192 +33 moe 59069 24832 +34 attention 35698 8192 +34 moe 59069 24832 +35 attention 35698 8192 +35 moe 59069 24832 +36 attention 35698 8192 +36 moe 59069 24832 +37 attention 35698 8192 +37 moe 59069 24832 +38 attention 35698 8192 +38 moe 59069 24832 +39 attention 35698 8192 +39 moe 59069 24832 +40 attention 35698 8192 +40 moe 59069 24832 +41 attention 35698 8192 +41 moe 59069 24832 +42 attention 35698 8192 +42 moe 59069 24832 +43 attention 35698 8192 +43 moe 59069 24832 +44 attention 35698 8192 +44 moe 59069 24832 +45 attention 35698 8192 +45 moe 59069 24832 +46 attention 35698 8192 +46 moe 59069 24832 +47 attention 35698 8192 +47 moe 59069 24832 +48 attention 35698 8192 +48 moe 59069 24832 +49 attention 35698 8192 +49 moe 59069 24832 +50 attention 35698 8192 +50 moe 59069 24832 +51 attention 35698 8192 +51 moe 59069 24832 +52 attention 35698 8192 +52 moe 59069 24832 +53 attention 35698 8192 +53 moe 59069 24832 +54 attention 35698 8192 +54 moe 59069 24832 +55 attention 35698 8192 +55 moe 59069 24832 +56 attention 35698 8192 +56 moe 59069 24832 +57 attention 35698 8192 +57 moe 59069 24832 +58 attention 35698 8192 +58 moe 59069 24832 +59 attention 35698 8192 +59 moe 59069 24832 +60 attention 35698 8192 +60 moe 59069 24832 +61 attention 35698 8192 +61 moe 59069 24832 +62 attention 35698 8192 +62 moe 59069 24832 +63 attention 35698 8192 +63 moe 59069 24832 +64 attention 35698 8192 +64 moe 59069 24832 +65 attention 35698 8192 +65 moe 59069 24832 +66 attention 35698 8192 +66 moe 59069 24832 +67 attention 35698 8192 +67 moe 59069 24832 +68 attention 35698 8192 +68 moe 59069 24832 +69 attention 35698 8192 +69 moe 59069 24832 +70 attention 35698 8192 +70 moe 59069 24832 +71 attention 35698 8192 +71 moe 59069 24832 +72 attention 35698 8192 +72 moe 59069 24832 +73 attention 35698 8192 +73 moe 59069 24832 +74 attention 35698 8192 +74 moe 59069 24832 +75 attention 35698 8192 +75 moe 59069 24832 +76 attention 35698 8192 +76 moe 59069 24832 +77 attention 35698 8192 +77 moe 59069 24832 +78 attention 35698 8192 +78 moe 59069 24832 +79 attention 35698 8192 +79 moe 59069 24832 +80 attention 35698 8192 +80 moe 59069 24832 +81 attention 35698 8192 +81 moe 59069 24832 +82 attention 35698 8192 +82 moe 59069 24832 +83 attention 35698 8192 +83 moe 59069 24832 +84 attention 35698 8192 +84 moe 59069 24832 +85 attention 35698 8192 +85 moe 59069 24832 +86 attention 35698 8192 +86 moe 59069 24832 +87 attention 35698 8192 +87 moe 59069 24832 +88 attention 35698 8192 +88 moe 59069 24832 +89 attention 35698 8192 +89 moe 59069 24832 +90 attention 35698 8192 +90 moe 59069 24832 +91 attention 35698 8192 +91 moe 59069 24832 +92 attention 35698 8192 +92 moe 59069 24832 +93 attention 35698 8192 +93 moe 59069 24832 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size32-tp4-pp1-ep4-bs1-seq100-prefill.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size32-tp4-pp1-ep4-bs1-seq100-prefill.csv new file mode 100644 index 00000000..ddd95041 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size32-tp4-pp1-ep4-bs1-seq100-prefill.csv @@ -0,0 +1,189 @@ +layer_id layer_name comp_time comm_size +0 attention 44798 819200 +0 moe 182516 2483200 +1 attention 44798 819200 +1 moe 182516 2483200 +2 attention 44798 819200 +2 moe 182516 2483200 +3 attention 44798 819200 +3 moe 182516 2483200 +4 attention 44798 819200 +4 moe 182516 2483200 +5 attention 44798 819200 +5 moe 182516 2483200 +6 attention 44798 819200 +6 moe 182516 2483200 +7 attention 44798 819200 +7 moe 182516 2483200 +8 attention 44798 819200 +8 moe 182516 2483200 +9 attention 44798 819200 +9 moe 182516 2483200 +10 attention 44798 819200 +10 moe 182516 2483200 +11 attention 44798 819200 +11 moe 182516 2483200 +12 attention 44798 819200 +12 moe 182516 2483200 +13 attention 44798 819200 +13 moe 182516 2483200 +14 attention 44798 819200 +14 moe 182516 2483200 +15 attention 44798 819200 +15 moe 182516 2483200 +16 attention 44798 819200 +16 moe 182516 2483200 +17 attention 44798 819200 +17 moe 182516 2483200 +18 attention 44798 819200 +18 moe 182516 2483200 +19 attention 44798 819200 +19 moe 182516 2483200 +20 attention 44798 819200 +20 moe 182516 2483200 +21 attention 44798 819200 +21 moe 182516 2483200 +22 attention 44798 819200 +22 moe 182516 2483200 +23 attention 44798 819200 +23 moe 182516 2483200 +24 attention 44798 819200 +24 moe 182516 2483200 +25 attention 44798 819200 +25 moe 182516 2483200 +26 attention 44798 819200 +26 moe 182516 2483200 +27 attention 44798 819200 +27 moe 182516 2483200 +28 attention 44798 819200 +28 moe 182516 2483200 +29 attention 44798 819200 +29 moe 182516 2483200 +30 attention 44798 819200 +30 moe 182516 2483200 +31 attention 44798 819200 +31 moe 182516 2483200 +32 attention 44798 819200 +32 moe 182516 2483200 +33 attention 44798 819200 +33 moe 182516 2483200 +34 attention 44798 819200 +34 moe 182516 2483200 +35 attention 44798 819200 +35 moe 182516 2483200 +36 attention 44798 819200 +36 moe 182516 2483200 +37 attention 44798 819200 +37 moe 182516 2483200 +38 attention 44798 819200 +38 moe 182516 2483200 +39 attention 44798 819200 +39 moe 182516 2483200 +40 attention 44798 819200 +40 moe 182516 2483200 +41 attention 44798 819200 +41 moe 182516 2483200 +42 attention 44798 819200 +42 moe 182516 2483200 +43 attention 44798 819200 +43 moe 182516 2483200 +44 attention 44798 819200 +44 moe 182516 2483200 +45 attention 44798 819200 +45 moe 182516 2483200 +46 attention 44798 819200 +46 moe 182516 2483200 +47 attention 44798 819200 +47 moe 182516 2483200 +48 attention 44798 819200 +48 moe 182516 2483200 +49 attention 44798 819200 +49 moe 182516 2483200 +50 attention 44798 819200 +50 moe 182516 2483200 +51 attention 44798 819200 +51 moe 182516 2483200 +52 attention 44798 819200 +52 moe 182516 2483200 +53 attention 44798 819200 +53 moe 182516 2483200 +54 attention 44798 819200 +54 moe 182516 2483200 +55 attention 44798 819200 +55 moe 182516 2483200 +56 attention 44798 819200 +56 moe 182516 2483200 +57 attention 44798 819200 +57 moe 182516 2483200 +58 attention 44798 819200 +58 moe 182516 2483200 +59 attention 44798 819200 +59 moe 182516 2483200 +60 attention 44798 819200 +60 moe 182516 2483200 +61 attention 44798 819200 +61 moe 182516 2483200 +62 attention 44798 819200 +62 moe 182516 2483200 +63 attention 44798 819200 +63 moe 182516 2483200 +64 attention 44798 819200 +64 moe 182516 2483200 +65 attention 44798 819200 +65 moe 182516 2483200 +66 attention 44798 819200 +66 moe 182516 2483200 +67 attention 44798 819200 +67 moe 182516 2483200 +68 attention 44798 819200 +68 moe 182516 2483200 +69 attention 44798 819200 +69 moe 182516 2483200 +70 attention 44798 819200 +70 moe 182516 2483200 +71 attention 44798 819200 +71 moe 182516 2483200 +72 attention 44798 819200 +72 moe 182516 2483200 +73 attention 44798 819200 +73 moe 182516 2483200 +74 attention 44798 819200 +74 moe 182516 2483200 +75 attention 44798 819200 +75 moe 182516 2483200 +76 attention 44798 819200 +76 moe 182516 2483200 +77 attention 44798 819200 +77 moe 182516 2483200 +78 attention 44798 819200 +78 moe 182516 2483200 +79 attention 44798 819200 +79 moe 182516 2483200 +80 attention 44798 819200 +80 moe 182516 2483200 +81 attention 44798 819200 +81 moe 182516 2483200 +82 attention 44798 819200 +82 moe 182516 2483200 +83 attention 44798 819200 +83 moe 182516 2483200 +84 attention 44798 819200 +84 moe 182516 2483200 +85 attention 44798 819200 +85 moe 182516 2483200 +86 attention 44798 819200 +86 moe 182516 2483200 +87 attention 44798 819200 +87 moe 182516 2483200 +88 attention 44798 819200 +88 moe 182516 2483200 +89 attention 44798 819200 +89 moe 182516 2483200 +90 attention 44798 819200 +90 moe 182516 2483200 +91 attention 44798 819200 +91 moe 182516 2483200 +92 attention 44798 819200 +92 moe 182516 2483200 +93 attention 44798 819200 +93 moe 182516 2483200 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size32-tp4-pp1-ep4-bs1-seq106-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size32-tp4-pp1-ep4-bs1-seq106-decode.csv new file mode 100644 index 00000000..717b79ed --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size32-tp4-pp1-ep4-bs1-seq106-decode.csv @@ -0,0 +1,189 @@ +layer_id layer_name comp_time comm_size +0 attention 35941 8192 +0 moe 59133 24832 +1 attention 35941 8192 +1 moe 59133 24832 +2 attention 35941 8192 +2 moe 59133 24832 +3 attention 35941 8192 +3 moe 59133 24832 +4 attention 35941 8192 +4 moe 59133 24832 +5 attention 35941 8192 +5 moe 59133 24832 +6 attention 35941 8192 +6 moe 59133 24832 +7 attention 35941 8192 +7 moe 59133 24832 +8 attention 35941 8192 +8 moe 59133 24832 +9 attention 35941 8192 +9 moe 59133 24832 +10 attention 35941 8192 +10 moe 59133 24832 +11 attention 35941 8192 +11 moe 59133 24832 +12 attention 35941 8192 +12 moe 59133 24832 +13 attention 35941 8192 +13 moe 59133 24832 +14 attention 35941 8192 +14 moe 59133 24832 +15 attention 35941 8192 +15 moe 59133 24832 +16 attention 35941 8192 +16 moe 59133 24832 +17 attention 35941 8192 +17 moe 59133 24832 +18 attention 35941 8192 +18 moe 59133 24832 +19 attention 35941 8192 +19 moe 59133 24832 +20 attention 35941 8192 +20 moe 59133 24832 +21 attention 35941 8192 +21 moe 59133 24832 +22 attention 35941 8192 +22 moe 59133 24832 +23 attention 35941 8192 +23 moe 59133 24832 +24 attention 35941 8192 +24 moe 59133 24832 +25 attention 35941 8192 +25 moe 59133 24832 +26 attention 35941 8192 +26 moe 59133 24832 +27 attention 35941 8192 +27 moe 59133 24832 +28 attention 35941 8192 +28 moe 59133 24832 +29 attention 35941 8192 +29 moe 59133 24832 +30 attention 35941 8192 +30 moe 59133 24832 +31 attention 35941 8192 +31 moe 59133 24832 +32 attention 35941 8192 +32 moe 59133 24832 +33 attention 35941 8192 +33 moe 59133 24832 +34 attention 35941 8192 +34 moe 59133 24832 +35 attention 35941 8192 +35 moe 59133 24832 +36 attention 35941 8192 +36 moe 59133 24832 +37 attention 35941 8192 +37 moe 59133 24832 +38 attention 35941 8192 +38 moe 59133 24832 +39 attention 35941 8192 +39 moe 59133 24832 +40 attention 35941 8192 +40 moe 59133 24832 +41 attention 35941 8192 +41 moe 59133 24832 +42 attention 35941 8192 +42 moe 59133 24832 +43 attention 35941 8192 +43 moe 59133 24832 +44 attention 35941 8192 +44 moe 59133 24832 +45 attention 35941 8192 +45 moe 59133 24832 +46 attention 35941 8192 +46 moe 59133 24832 +47 attention 35941 8192 +47 moe 59133 24832 +48 attention 35941 8192 +48 moe 59133 24832 +49 attention 35941 8192 +49 moe 59133 24832 +50 attention 35941 8192 +50 moe 59133 24832 +51 attention 35941 8192 +51 moe 59133 24832 +52 attention 35941 8192 +52 moe 59133 24832 +53 attention 35941 8192 +53 moe 59133 24832 +54 attention 35941 8192 +54 moe 59133 24832 +55 attention 35941 8192 +55 moe 59133 24832 +56 attention 35941 8192 +56 moe 59133 24832 +57 attention 35941 8192 +57 moe 59133 24832 +58 attention 35941 8192 +58 moe 59133 24832 +59 attention 35941 8192 +59 moe 59133 24832 +60 attention 35941 8192 +60 moe 59133 24832 +61 attention 35941 8192 +61 moe 59133 24832 +62 attention 35941 8192 +62 moe 59133 24832 +63 attention 35941 8192 +63 moe 59133 24832 +64 attention 35941 8192 +64 moe 59133 24832 +65 attention 35941 8192 +65 moe 59133 24832 +66 attention 35941 8192 +66 moe 59133 24832 +67 attention 35941 8192 +67 moe 59133 24832 +68 attention 35941 8192 +68 moe 59133 24832 +69 attention 35941 8192 +69 moe 59133 24832 +70 attention 35941 8192 +70 moe 59133 24832 +71 attention 35941 8192 +71 moe 59133 24832 +72 attention 35941 8192 +72 moe 59133 24832 +73 attention 35941 8192 +73 moe 59133 24832 +74 attention 35941 8192 +74 moe 59133 24832 +75 attention 35941 8192 +75 moe 59133 24832 +76 attention 35941 8192 +76 moe 59133 24832 +77 attention 35941 8192 +77 moe 59133 24832 +78 attention 35941 8192 +78 moe 59133 24832 +79 attention 35941 8192 +79 moe 59133 24832 +80 attention 35941 8192 +80 moe 59133 24832 +81 attention 35941 8192 +81 moe 59133 24832 +82 attention 35941 8192 +82 moe 59133 24832 +83 attention 35941 8192 +83 moe 59133 24832 +84 attention 35941 8192 +84 moe 59133 24832 +85 attention 35941 8192 +85 moe 59133 24832 +86 attention 35941 8192 +86 moe 59133 24832 +87 attention 35941 8192 +87 moe 59133 24832 +88 attention 35941 8192 +88 moe 59133 24832 +89 attention 35941 8192 +89 moe 59133 24832 +90 attention 35941 8192 +90 moe 59133 24832 +91 attention 35941 8192 +91 moe 59133 24832 +92 attention 35941 8192 +92 moe 59133 24832 +93 attention 35941 8192 +93 moe 59133 24832 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size8-tp4-pp1-ep8-bs1-seq100-prefill.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size8-tp4-pp1-ep8-bs1-seq100-prefill.csv new file mode 100644 index 00000000..c2b0e519 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Moe-235B-world_size8-tp4-pp1-ep8-bs1-seq100-prefill.csv @@ -0,0 +1,189 @@ +layer_id layer_name comp_time comm_size +0 attention 44945 819200 +0 moe 197980 2483200 +1 attention 44945 819200 +1 moe 197980 2483200 +2 attention 44945 819200 +2 moe 197980 2483200 +3 attention 44945 819200 +3 moe 197980 2483200 +4 attention 44945 819200 +4 moe 197980 2483200 +5 attention 44945 819200 +5 moe 197980 2483200 +6 attention 44945 819200 +6 moe 197980 2483200 +7 attention 44945 819200 +7 moe 197980 2483200 +8 attention 44945 819200 +8 moe 197980 2483200 +9 attention 44945 819200 +9 moe 197980 2483200 +10 attention 44945 819200 +10 moe 197980 2483200 +11 attention 44945 819200 +11 moe 197980 2483200 +12 attention 44945 819200 +12 moe 197980 2483200 +13 attention 44945 819200 +13 moe 197980 2483200 +14 attention 44945 819200 +14 moe 197980 2483200 +15 attention 44945 819200 +15 moe 197980 2483200 +16 attention 44945 819200 +16 moe 197980 2483200 +17 attention 44945 819200 +17 moe 197980 2483200 +18 attention 44945 819200 +18 moe 197980 2483200 +19 attention 44945 819200 +19 moe 197980 2483200 +20 attention 44945 819200 +20 moe 197980 2483200 +21 attention 44945 819200 +21 moe 197980 2483200 +22 attention 44945 819200 +22 moe 197980 2483200 +23 attention 44945 819200 +23 moe 197980 2483200 +24 attention 44945 819200 +24 moe 197980 2483200 +25 attention 44945 819200 +25 moe 197980 2483200 +26 attention 44945 819200 +26 moe 197980 2483200 +27 attention 44945 819200 +27 moe 197980 2483200 +28 attention 44945 819200 +28 moe 197980 2483200 +29 attention 44945 819200 +29 moe 197980 2483200 +30 attention 44945 819200 +30 moe 197980 2483200 +31 attention 44945 819200 +31 moe 197980 2483200 +32 attention 44945 819200 +32 moe 197980 2483200 +33 attention 44945 819200 +33 moe 197980 2483200 +34 attention 44945 819200 +34 moe 197980 2483200 +35 attention 44945 819200 +35 moe 197980 2483200 +36 attention 44945 819200 +36 moe 197980 2483200 +37 attention 44945 819200 +37 moe 197980 2483200 +38 attention 44945 819200 +38 moe 197980 2483200 +39 attention 44945 819200 +39 moe 197980 2483200 +40 attention 44945 819200 +40 moe 197980 2483200 +41 attention 44945 819200 +41 moe 197980 2483200 +42 attention 44945 819200 +42 moe 197980 2483200 +43 attention 44945 819200 +43 moe 197980 2483200 +44 attention 44945 819200 +44 moe 197980 2483200 +45 attention 44945 819200 +45 moe 197980 2483200 +46 attention 44945 819200 +46 moe 197980 2483200 +47 attention 44945 819200 +47 moe 197980 2483200 +48 attention 44945 819200 +48 moe 197980 2483200 +49 attention 44945 819200 +49 moe 197980 2483200 +50 attention 44945 819200 +50 moe 197980 2483200 +51 attention 44945 819200 +51 moe 197980 2483200 +52 attention 44945 819200 +52 moe 197980 2483200 +53 attention 44945 819200 +53 moe 197980 2483200 +54 attention 44945 819200 +54 moe 197980 2483200 +55 attention 44945 819200 +55 moe 197980 2483200 +56 attention 44945 819200 +56 moe 197980 2483200 +57 attention 44945 819200 +57 moe 197980 2483200 +58 attention 44945 819200 +58 moe 197980 2483200 +59 attention 44945 819200 +59 moe 197980 2483200 +60 attention 44945 819200 +60 moe 197980 2483200 +61 attention 44945 819200 +61 moe 197980 2483200 +62 attention 44945 819200 +62 moe 197980 2483200 +63 attention 44945 819200 +63 moe 197980 2483200 +64 attention 44945 819200 +64 moe 197980 2483200 +65 attention 44945 819200 +65 moe 197980 2483200 +66 attention 44945 819200 +66 moe 197980 2483200 +67 attention 44945 819200 +67 moe 197980 2483200 +68 attention 44945 819200 +68 moe 197980 2483200 +69 attention 44945 819200 +69 moe 197980 2483200 +70 attention 44945 819200 +70 moe 197980 2483200 +71 attention 44945 819200 +71 moe 197980 2483200 +72 attention 44945 819200 +72 moe 197980 2483200 +73 attention 44945 819200 +73 moe 197980 2483200 +74 attention 44945 819200 +74 moe 197980 2483200 +75 attention 44945 819200 +75 moe 197980 2483200 +76 attention 44945 819200 +76 moe 197980 2483200 +77 attention 44945 819200 +77 moe 197980 2483200 +78 attention 44945 819200 +78 moe 197980 2483200 +79 attention 44945 819200 +79 moe 197980 2483200 +80 attention 44945 819200 +80 moe 197980 2483200 +81 attention 44945 819200 +81 moe 197980 2483200 +82 attention 44945 819200 +82 moe 197980 2483200 +83 attention 44945 819200 +83 moe 197980 2483200 +84 attention 44945 819200 +84 moe 197980 2483200 +85 attention 44945 819200 +85 moe 197980 2483200 +86 attention 44945 819200 +86 moe 197980 2483200 +87 attention 44945 819200 +87 moe 197980 2483200 +88 attention 44945 819200 +88 moe 197980 2483200 +89 attention 44945 819200 +89 moe 197980 2483200 +90 attention 44945 819200 +90 moe 197980 2483200 +91 attention 44945 819200 +91 moe 197980 2483200 +92 attention 44945 819200 +92 moe 197980 2483200 +93 attention 44945 819200 +93 moe 197980 2483200 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size2-tp1-pp1-ep2-bs1-seq100-prefill.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size2-tp1-pp1-ep2-bs1-seq100-prefill.csv new file mode 100644 index 00000000..95cf3416 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size2-tp1-pp1-ep2-bs1-seq100-prefill.csv @@ -0,0 +1,97 @@ +layer_id layer_name comp_time comm_size +0 attention 45585 0 +0 moe 496646 6208000 +1 attention 45585 0 +1 moe 496646 6208000 +2 attention 45585 0 +2 moe 496646 6208000 +3 attention 53624 0 +3 moe 496646 6208000 +4 attention 45585 0 +4 moe 496646 6208000 +5 attention 45585 0 +5 moe 496646 6208000 +6 attention 45585 0 +6 moe 496646 6208000 +7 attention 53624 0 +7 moe 496646 6208000 +8 attention 45585 0 +8 moe 496646 6208000 +9 attention 45585 0 +9 moe 496646 6208000 +10 attention 45585 0 +10 moe 496646 6208000 +11 attention 53624 0 +11 moe 496646 6208000 +12 attention 45585 0 +12 moe 496646 6208000 +13 attention 45585 0 +13 moe 496646 6208000 +14 attention 45585 0 +14 moe 496646 6208000 +15 attention 53624 0 +15 moe 496646 6208000 +16 attention 45585 0 +16 moe 496646 6208000 +17 attention 45585 0 +17 moe 496646 6208000 +18 attention 45585 0 +18 moe 496646 6208000 +19 attention 53624 0 +19 moe 496646 6208000 +20 attention 45585 0 +20 moe 496646 6208000 +21 attention 45585 0 +21 moe 496646 6208000 +22 attention 45585 0 +22 moe 496646 6208000 +23 attention 53624 0 +23 moe 496646 6208000 +24 attention 45585 0 +24 moe 496646 6208000 +25 attention 45585 0 +25 moe 496646 6208000 +26 attention 45585 0 +26 moe 496646 6208000 +27 attention 53624 0 +27 moe 496646 6208000 +28 attention 45585 0 +28 moe 496646 6208000 +29 attention 45585 0 +29 moe 496646 6208000 +30 attention 45585 0 +30 moe 496646 6208000 +31 attention 53624 0 +31 moe 496646 6208000 +32 attention 45585 0 +32 moe 496646 6208000 +33 attention 45585 0 +33 moe 496646 6208000 +34 attention 45585 0 +34 moe 496646 6208000 +35 attention 53624 0 +35 moe 496646 6208000 +36 attention 45585 0 +36 moe 496646 6208000 +37 attention 45585 0 +37 moe 496646 6208000 +38 attention 45585 0 +38 moe 496646 6208000 +39 attention 53624 0 +39 moe 496646 6208000 +40 attention 45585 0 +40 moe 496646 6208000 +41 attention 45585 0 +41 moe 496646 6208000 +42 attention 45585 0 +42 moe 496646 6208000 +43 attention 53624 0 +43 moe 496646 6208000 +44 attention 45585 0 +44 moe 496646 6208000 +45 attention 45585 0 +45 moe 496646 6208000 +46 attention 45585 0 +46 moe 496646 6208000 +47 attention 53624 0 +47 moe 496646 6208000 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size32-tp1-pp1-ep32-bs1-seq100-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size32-tp1-pp1-ep32-bs1-seq100-decode.csv new file mode 100644 index 00000000..d5b781db --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size32-tp1-pp1-ep32-bs1-seq100-decode.csv @@ -0,0 +1,97 @@ +layer_id layer_name comp_time comm_size +0 attention 26828 0 +0 moe 47850 62080 +1 attention 26828 0 +1 moe 47850 62080 +2 attention 26828 0 +2 moe 47850 62080 +3 attention 42077 0 +3 moe 47850 62080 +4 attention 26828 0 +4 moe 47850 62080 +5 attention 26828 0 +5 moe 47850 62080 +6 attention 26828 0 +6 moe 47850 62080 +7 attention 42077 0 +7 moe 47850 62080 +8 attention 26828 0 +8 moe 47850 62080 +9 attention 26828 0 +9 moe 47850 62080 +10 attention 26828 0 +10 moe 47850 62080 +11 attention 42077 0 +11 moe 47850 62080 +12 attention 26828 0 +12 moe 47850 62080 +13 attention 26828 0 +13 moe 47850 62080 +14 attention 26828 0 +14 moe 47850 62080 +15 attention 42077 0 +15 moe 47850 62080 +16 attention 26828 0 +16 moe 47850 62080 +17 attention 26828 0 +17 moe 47850 62080 +18 attention 26828 0 +18 moe 47850 62080 +19 attention 42077 0 +19 moe 47850 62080 +20 attention 26828 0 +20 moe 47850 62080 +21 attention 26828 0 +21 moe 47850 62080 +22 attention 26828 0 +22 moe 47850 62080 +23 attention 42077 0 +23 moe 47850 62080 +24 attention 26828 0 +24 moe 47850 62080 +25 attention 26828 0 +25 moe 47850 62080 +26 attention 26828 0 +26 moe 47850 62080 +27 attention 42077 0 +27 moe 47850 62080 +28 attention 26828 0 +28 moe 47850 62080 +29 attention 26828 0 +29 moe 47850 62080 +30 attention 26828 0 +30 moe 47850 62080 +31 attention 42077 0 +31 moe 47850 62080 +32 attention 26828 0 +32 moe 47850 62080 +33 attention 26828 0 +33 moe 47850 62080 +34 attention 26828 0 +34 moe 47850 62080 +35 attention 42077 0 +35 moe 47850 62080 +36 attention 26828 0 +36 moe 47850 62080 +37 attention 26828 0 +37 moe 47850 62080 +38 attention 26828 0 +38 moe 47850 62080 +39 attention 42077 0 +39 moe 47850 62080 +40 attention 26828 0 +40 moe 47850 62080 +41 attention 26828 0 +41 moe 47850 62080 +42 attention 26828 0 +42 moe 47850 62080 +43 attention 42077 0 +43 moe 47850 62080 +44 attention 26828 0 +44 moe 47850 62080 +45 attention 26828 0 +45 moe 47850 62080 +46 attention 26828 0 +46 moe 47850 62080 +47 attention 42077 0 +47 moe 47850 62080 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size32-tp1-pp1-ep32-bs1-seq106-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size32-tp1-pp1-ep32-bs1-seq106-decode.csv new file mode 100644 index 00000000..5f792bae --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size32-tp1-pp1-ep32-bs1-seq106-decode.csv @@ -0,0 +1,97 @@ +layer_id layer_name comp_time comm_size +0 attention 27045 0 +0 moe 47979 62080 +1 attention 27045 0 +1 moe 47979 62080 +2 attention 27045 0 +2 moe 47979 62080 +3 attention 41999 0 +3 moe 47979 62080 +4 attention 27045 0 +4 moe 47979 62080 +5 attention 27045 0 +5 moe 47979 62080 +6 attention 27045 0 +6 moe 47979 62080 +7 attention 41999 0 +7 moe 47979 62080 +8 attention 27045 0 +8 moe 47979 62080 +9 attention 27045 0 +9 moe 47979 62080 +10 attention 27045 0 +10 moe 47979 62080 +11 attention 41999 0 +11 moe 47979 62080 +12 attention 27045 0 +12 moe 47979 62080 +13 attention 27045 0 +13 moe 47979 62080 +14 attention 27045 0 +14 moe 47979 62080 +15 attention 41999 0 +15 moe 47979 62080 +16 attention 27045 0 +16 moe 47979 62080 +17 attention 27045 0 +17 moe 47979 62080 +18 attention 27045 0 +18 moe 47979 62080 +19 attention 41999 0 +19 moe 47979 62080 +20 attention 27045 0 +20 moe 47979 62080 +21 attention 27045 0 +21 moe 47979 62080 +22 attention 27045 0 +22 moe 47979 62080 +23 attention 41999 0 +23 moe 47979 62080 +24 attention 27045 0 +24 moe 47979 62080 +25 attention 27045 0 +25 moe 47979 62080 +26 attention 27045 0 +26 moe 47979 62080 +27 attention 41999 0 +27 moe 47979 62080 +28 attention 27045 0 +28 moe 47979 62080 +29 attention 27045 0 +29 moe 47979 62080 +30 attention 27045 0 +30 moe 47979 62080 +31 attention 41999 0 +31 moe 47979 62080 +32 attention 27045 0 +32 moe 47979 62080 +33 attention 27045 0 +33 moe 47979 62080 +34 attention 27045 0 +34 moe 47979 62080 +35 attention 41999 0 +35 moe 47979 62080 +36 attention 27045 0 +36 moe 47979 62080 +37 attention 27045 0 +37 moe 47979 62080 +38 attention 27045 0 +38 moe 47979 62080 +39 attention 41999 0 +39 moe 47979 62080 +40 attention 27045 0 +40 moe 47979 62080 +41 attention 27045 0 +41 moe 47979 62080 +42 attention 27045 0 +42 moe 47979 62080 +43 attention 41999 0 +43 moe 47979 62080 +44 attention 27045 0 +44 moe 47979 62080 +45 attention 27045 0 +45 moe 47979 62080 +46 attention 27045 0 +46 moe 47979 62080 +47 attention 41999 0 +47 moe 47979 62080 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size6-tp1-pp1-ep6-bs1-seq100-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size6-tp1-pp1-ep6-bs1-seq100-decode.csv new file mode 100644 index 00000000..ccfea378 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size6-tp1-pp1-ep6-bs1-seq100-decode.csv @@ -0,0 +1,97 @@ +layer_id layer_name comp_time comm_size +0 attention 26930 0 +0 moe 132351 62080 +1 attention 26930 0 +1 moe 132351 62080 +2 attention 26930 0 +2 moe 132351 62080 +3 attention 41834 0 +3 moe 132351 62080 +4 attention 26930 0 +4 moe 132351 62080 +5 attention 26930 0 +5 moe 132351 62080 +6 attention 26930 0 +6 moe 132351 62080 +7 attention 41834 0 +7 moe 132351 62080 +8 attention 26930 0 +8 moe 132351 62080 +9 attention 26930 0 +9 moe 132351 62080 +10 attention 26930 0 +10 moe 132351 62080 +11 attention 41834 0 +11 moe 132351 62080 +12 attention 26930 0 +12 moe 132351 62080 +13 attention 26930 0 +13 moe 132351 62080 +14 attention 26930 0 +14 moe 132351 62080 +15 attention 41834 0 +15 moe 132351 62080 +16 attention 26930 0 +16 moe 132351 62080 +17 attention 26930 0 +17 moe 132351 62080 +18 attention 26930 0 +18 moe 132351 62080 +19 attention 41834 0 +19 moe 132351 62080 +20 attention 26930 0 +20 moe 132351 62080 +21 attention 26930 0 +21 moe 132351 62080 +22 attention 26930 0 +22 moe 132351 62080 +23 attention 41834 0 +23 moe 132351 62080 +24 attention 26930 0 +24 moe 132351 62080 +25 attention 26930 0 +25 moe 132351 62080 +26 attention 26930 0 +26 moe 132351 62080 +27 attention 41834 0 +27 moe 132351 62080 +28 attention 26930 0 +28 moe 132351 62080 +29 attention 26930 0 +29 moe 132351 62080 +30 attention 26930 0 +30 moe 132351 62080 +31 attention 41834 0 +31 moe 132351 62080 +32 attention 26930 0 +32 moe 132351 62080 +33 attention 26930 0 +33 moe 132351 62080 +34 attention 26930 0 +34 moe 132351 62080 +35 attention 41834 0 +35 moe 132351 62080 +36 attention 26930 0 +36 moe 132351 62080 +37 attention 26930 0 +37 moe 132351 62080 +38 attention 26930 0 +38 moe 132351 62080 +39 attention 41834 0 +39 moe 132351 62080 +40 attention 26930 0 +40 moe 132351 62080 +41 attention 26930 0 +41 moe 132351 62080 +42 attention 26930 0 +42 moe 132351 62080 +43 attention 41834 0 +43 moe 132351 62080 +44 attention 26930 0 +44 moe 132351 62080 +45 attention 26930 0 +45 moe 132351 62080 +46 attention 26930 0 +46 moe 132351 62080 +47 attention 41834 0 +47 moe 132351 62080 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size6-tp1-pp1-ep6-bs1-seq106-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size6-tp1-pp1-ep6-bs1-seq106-decode.csv new file mode 100644 index 00000000..2cb17e94 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size6-tp1-pp1-ep6-bs1-seq106-decode.csv @@ -0,0 +1,97 @@ +layer_id layer_name comp_time comm_size +0 attention 26979 0 +0 moe 132058 62080 +1 attention 26979 0 +1 moe 132058 62080 +2 attention 26979 0 +2 moe 132058 62080 +3 attention 41884 0 +3 moe 132058 62080 +4 attention 26979 0 +4 moe 132058 62080 +5 attention 26979 0 +5 moe 132058 62080 +6 attention 26979 0 +6 moe 132058 62080 +7 attention 41884 0 +7 moe 132058 62080 +8 attention 26979 0 +8 moe 132058 62080 +9 attention 26979 0 +9 moe 132058 62080 +10 attention 26979 0 +10 moe 132058 62080 +11 attention 41884 0 +11 moe 132058 62080 +12 attention 26979 0 +12 moe 132058 62080 +13 attention 26979 0 +13 moe 132058 62080 +14 attention 26979 0 +14 moe 132058 62080 +15 attention 41884 0 +15 moe 132058 62080 +16 attention 26979 0 +16 moe 132058 62080 +17 attention 26979 0 +17 moe 132058 62080 +18 attention 26979 0 +18 moe 132058 62080 +19 attention 41884 0 +19 moe 132058 62080 +20 attention 26979 0 +20 moe 132058 62080 +21 attention 26979 0 +21 moe 132058 62080 +22 attention 26979 0 +22 moe 132058 62080 +23 attention 41884 0 +23 moe 132058 62080 +24 attention 26979 0 +24 moe 132058 62080 +25 attention 26979 0 +25 moe 132058 62080 +26 attention 26979 0 +26 moe 132058 62080 +27 attention 41884 0 +27 moe 132058 62080 +28 attention 26979 0 +28 moe 132058 62080 +29 attention 26979 0 +29 moe 132058 62080 +30 attention 26979 0 +30 moe 132058 62080 +31 attention 41884 0 +31 moe 132058 62080 +32 attention 26979 0 +32 moe 132058 62080 +33 attention 26979 0 +33 moe 132058 62080 +34 attention 26979 0 +34 moe 132058 62080 +35 attention 41884 0 +35 moe 132058 62080 +36 attention 26979 0 +36 moe 132058 62080 +37 attention 26979 0 +37 moe 132058 62080 +38 attention 26979 0 +38 moe 132058 62080 +39 attention 41884 0 +39 moe 132058 62080 +40 attention 26979 0 +40 moe 132058 62080 +41 attention 26979 0 +41 moe 132058 62080 +42 attention 26979 0 +42 moe 132058 62080 +43 attention 41884 0 +43 moe 132058 62080 +44 attention 26979 0 +44 moe 132058 62080 +45 attention 26979 0 +45 moe 132058 62080 +46 attention 26979 0 +46 moe 132058 62080 +47 attention 41884 0 +47 moe 132058 62080 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size8-tp1-pp1-ep8-bs1-seq100-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size8-tp1-pp1-ep8-bs1-seq100-decode.csv new file mode 100644 index 00000000..d7d4bd1a --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size8-tp1-pp1-ep8-bs1-seq100-decode.csv @@ -0,0 +1,97 @@ +layer_id layer_name comp_time comm_size +0 attention 26734 0 +0 moe 113280 62080 +1 attention 26734 0 +1 moe 113280 62080 +2 attention 26734 0 +2 moe 113280 62080 +3 attention 42127 0 +3 moe 113280 62080 +4 attention 26734 0 +4 moe 113280 62080 +5 attention 26734 0 +5 moe 113280 62080 +6 attention 26734 0 +6 moe 113280 62080 +7 attention 42127 0 +7 moe 113280 62080 +8 attention 26734 0 +8 moe 113280 62080 +9 attention 26734 0 +9 moe 113280 62080 +10 attention 26734 0 +10 moe 113280 62080 +11 attention 42127 0 +11 moe 113280 62080 +12 attention 26734 0 +12 moe 113280 62080 +13 attention 26734 0 +13 moe 113280 62080 +14 attention 26734 0 +14 moe 113280 62080 +15 attention 42127 0 +15 moe 113280 62080 +16 attention 26734 0 +16 moe 113280 62080 +17 attention 26734 0 +17 moe 113280 62080 +18 attention 26734 0 +18 moe 113280 62080 +19 attention 42127 0 +19 moe 113280 62080 +20 attention 26734 0 +20 moe 113280 62080 +21 attention 26734 0 +21 moe 113280 62080 +22 attention 26734 0 +22 moe 113280 62080 +23 attention 42127 0 +23 moe 113280 62080 +24 attention 26734 0 +24 moe 113280 62080 +25 attention 26734 0 +25 moe 113280 62080 +26 attention 26734 0 +26 moe 113280 62080 +27 attention 42127 0 +27 moe 113280 62080 +28 attention 26734 0 +28 moe 113280 62080 +29 attention 26734 0 +29 moe 113280 62080 +30 attention 26734 0 +30 moe 113280 62080 +31 attention 42127 0 +31 moe 113280 62080 +32 attention 26734 0 +32 moe 113280 62080 +33 attention 26734 0 +33 moe 113280 62080 +34 attention 26734 0 +34 moe 113280 62080 +35 attention 42127 0 +35 moe 113280 62080 +36 attention 26734 0 +36 moe 113280 62080 +37 attention 26734 0 +37 moe 113280 62080 +38 attention 26734 0 +38 moe 113280 62080 +39 attention 42127 0 +39 moe 113280 62080 +40 attention 26734 0 +40 moe 113280 62080 +41 attention 26734 0 +41 moe 113280 62080 +42 attention 26734 0 +42 moe 113280 62080 +43 attention 42127 0 +43 moe 113280 62080 +44 attention 26734 0 +44 moe 113280 62080 +45 attention 26734 0 +45 moe 113280 62080 +46 attention 26734 0 +46 moe 113280 62080 +47 attention 42127 0 +47 moe 113280 62080 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size8-tp1-pp1-ep8-bs1-seq100-prefill.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size8-tp1-pp1-ep8-bs1-seq100-prefill.csv new file mode 100644 index 00000000..d90195a4 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size8-tp1-pp1-ep8-bs1-seq100-prefill.csv @@ -0,0 +1,97 @@ +layer_id layer_name comp_time comm_size +0 attention 45839 0 +0 moe 146500 6208000 +1 attention 45839 0 +1 moe 146500 6208000 +2 attention 45839 0 +2 moe 146500 6208000 +3 attention 53616 0 +3 moe 146500 6208000 +4 attention 45839 0 +4 moe 146500 6208000 +5 attention 45839 0 +5 moe 146500 6208000 +6 attention 45839 0 +6 moe 146500 6208000 +7 attention 53616 0 +7 moe 146500 6208000 +8 attention 45839 0 +8 moe 146500 6208000 +9 attention 45839 0 +9 moe 146500 6208000 +10 attention 45839 0 +10 moe 146500 6208000 +11 attention 53616 0 +11 moe 146500 6208000 +12 attention 45839 0 +12 moe 146500 6208000 +13 attention 45839 0 +13 moe 146500 6208000 +14 attention 45839 0 +14 moe 146500 6208000 +15 attention 53616 0 +15 moe 146500 6208000 +16 attention 45839 0 +16 moe 146500 6208000 +17 attention 45839 0 +17 moe 146500 6208000 +18 attention 45839 0 +18 moe 146500 6208000 +19 attention 53616 0 +19 moe 146500 6208000 +20 attention 45839 0 +20 moe 146500 6208000 +21 attention 45839 0 +21 moe 146500 6208000 +22 attention 45839 0 +22 moe 146500 6208000 +23 attention 53616 0 +23 moe 146500 6208000 +24 attention 45839 0 +24 moe 146500 6208000 +25 attention 45839 0 +25 moe 146500 6208000 +26 attention 45839 0 +26 moe 146500 6208000 +27 attention 53616 0 +27 moe 146500 6208000 +28 attention 45839 0 +28 moe 146500 6208000 +29 attention 45839 0 +29 moe 146500 6208000 +30 attention 45839 0 +30 moe 146500 6208000 +31 attention 53616 0 +31 moe 146500 6208000 +32 attention 45839 0 +32 moe 146500 6208000 +33 attention 45839 0 +33 moe 146500 6208000 +34 attention 45839 0 +34 moe 146500 6208000 +35 attention 53616 0 +35 moe 146500 6208000 +36 attention 45839 0 +36 moe 146500 6208000 +37 attention 45839 0 +37 moe 146500 6208000 +38 attention 45839 0 +38 moe 146500 6208000 +39 attention 53616 0 +39 moe 146500 6208000 +40 attention 45839 0 +40 moe 146500 6208000 +41 attention 45839 0 +41 moe 146500 6208000 +42 attention 45839 0 +42 moe 146500 6208000 +43 attention 53616 0 +43 moe 146500 6208000 +44 attention 45839 0 +44 moe 146500 6208000 +45 attention 45839 0 +45 moe 146500 6208000 +46 attention 45839 0 +46 moe 146500 6208000 +47 attention 53616 0 +47 moe 146500 6208000 diff --git a/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size8-tp1-pp1-ep8-bs1-seq106-decode.csv b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size8-tp1-pp1-ep8-bs1-seq106-decode.csv new file mode 100644 index 00000000..5913296e --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/cache/vidur-Qwen3-Next-80B-world_size8-tp1-pp1-ep8-bs1-seq106-decode.csv @@ -0,0 +1,97 @@ +layer_id layer_name comp_time comm_size +0 attention 26914 0 +0 moe 113099 62080 +1 attention 26914 0 +1 moe 113099 62080 +2 attention 26914 0 +2 moe 113099 62080 +3 attention 42109 0 +3 moe 113099 62080 +4 attention 26914 0 +4 moe 113099 62080 +5 attention 26914 0 +5 moe 113099 62080 +6 attention 26914 0 +6 moe 113099 62080 +7 attention 42109 0 +7 moe 113099 62080 +8 attention 26914 0 +8 moe 113099 62080 +9 attention 26914 0 +9 moe 113099 62080 +10 attention 26914 0 +10 moe 113099 62080 +11 attention 42109 0 +11 moe 113099 62080 +12 attention 26914 0 +12 moe 113099 62080 +13 attention 26914 0 +13 moe 113099 62080 +14 attention 26914 0 +14 moe 113099 62080 +15 attention 42109 0 +15 moe 113099 62080 +16 attention 26914 0 +16 moe 113099 62080 +17 attention 26914 0 +17 moe 113099 62080 +18 attention 26914 0 +18 moe 113099 62080 +19 attention 42109 0 +19 moe 113099 62080 +20 attention 26914 0 +20 moe 113099 62080 +21 attention 26914 0 +21 moe 113099 62080 +22 attention 26914 0 +22 moe 113099 62080 +23 attention 42109 0 +23 moe 113099 62080 +24 attention 26914 0 +24 moe 113099 62080 +25 attention 26914 0 +25 moe 113099 62080 +26 attention 26914 0 +26 moe 113099 62080 +27 attention 42109 0 +27 moe 113099 62080 +28 attention 26914 0 +28 moe 113099 62080 +29 attention 26914 0 +29 moe 113099 62080 +30 attention 26914 0 +30 moe 113099 62080 +31 attention 42109 0 +31 moe 113099 62080 +32 attention 26914 0 +32 moe 113099 62080 +33 attention 26914 0 +33 moe 113099 62080 +34 attention 26914 0 +34 moe 113099 62080 +35 attention 42109 0 +35 moe 113099 62080 +36 attention 26914 0 +36 moe 113099 62080 +37 attention 26914 0 +37 moe 113099 62080 +38 attention 26914 0 +38 moe 113099 62080 +39 attention 42109 0 +39 moe 113099 62080 +40 attention 26914 0 +40 moe 113099 62080 +41 attention 26914 0 +41 moe 113099 62080 +42 attention 26914 0 +42 moe 113099 62080 +43 attention 42109 0 +43 moe 113099 62080 +44 attention 26914 0 +44 moe 113099 62080 +45 attention 26914 0 +45 moe 113099 62080 +46 attention 26914 0 +46 moe 113099 62080 +47 attention 42109 0 +47 moe 113099 62080 diff --git a/vidur-alibabacloud/data/aicb_workload/logs/aicb_cache_log.txt b/vidur-alibabacloud/data/aicb_workload/logs/aicb_cache_log.txt new file mode 100644 index 00000000..23402822 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/logs/aicb_cache_log.txt @@ -0,0 +1,5601 @@ +[2026-02-09 08:27:29] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-09 08:28:29] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-09 08:29:29] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-09 08:30:29] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-09 08:37:58] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-09 08:38:03] [未命中] key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill +[2026-02-09 08:38:03] [缓存] key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, layers=94 +[2026-02-09 08:38:03] [未命中] key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode +[2026-02-09 08:38:03] [缓存] key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, layers=94 +[2026-02-09 08:38:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-02-09 08:38:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-09 08:38:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=3 +[2026-02-09 08:38:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=4 +[2026-02-09 08:38:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=5 +[2026-02-09 08:38:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=6 +[2026-02-09 08:38:03] 统计: hits=6, interp=0, calls=0, entries=2 +[2026-02-09 08:38:03] 查表索引已保存到 /disk1/futianhao/software02/simai-mem/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-09 16:00:50] 从磁盘加载了 2 条缓存记录 +[2026-02-09 16:00:50] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-09 16:00:57] 最近邻: seq=101, nearest=100 +[2026-02-09 16:00:57] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-09 16:00:57] 最近邻: seq=102, nearest=101 +[2026-02-09 16:00:57] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-09 16:00:57] 最近邻: seq=103, nearest=102 +[2026-02-09 16:00:57] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-09 16:00:57] 最近邻: seq=104, nearest=103 +[2026-02-09 16:00:57] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-09 16:00:57] 最近邻: seq=105, nearest=104 +[2026-02-09 16:00:57] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-09 16:00:57] 最近邻: seq=106, nearest=105 +[2026-02-09 16:00:57] [插值] seq=106, 使用邻居插值, 总插值=6 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=3 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=4 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode, 总命中=5 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode, 总命中=6 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode, 总命中=7 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode, 总命中=8 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode, 总命中=9 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=10 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=11 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=12 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode, 总命中=13 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode, 总命中=14 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode, 总命中=15 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode, 总命中=16 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode, 总命中=17 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=18 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=19 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=20 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode, 总命中=21 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode, 总命中=22 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode, 总命中=23 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode, 总命中=24 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode, 总命中=25 +[2026-02-09 16:00:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=26 +[2026-02-09 16:04:26] 从磁盘加载了 2 条缓存记录 +[2026-02-09 16:04:26] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-09 16:04:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-02-09 16:04:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-09 16:04:33] [AICB调用] 第1次subprocess调用 +[2026-02-09 16:05:04] [缓存] key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, layers=94 +[2026-02-09 16:05:04] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-09 16:05:04] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-09 16:05:04] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-09 16:05:04] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-09 16:05:04] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-09 16:05:04] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-09 16:05:04] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-09 16:05:04] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-09 16:05:04] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-09 16:05:04] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=3 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=5 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode, 总命中=6 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode, 总命中=7 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode, 总命中=8 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode, 总命中=9 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode, 总命中=10 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=11 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=13 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode, 总命中=14 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode, 总命中=15 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode, 总命中=16 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode, 总命中=17 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode, 总命中=18 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=19 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=21 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode, 总命中=22 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode, 总命中=23 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode, 总命中=24 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode, 总命中=25 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode, 总命中=26 +[2026-02-09 16:05:04] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=27 +[2026-02-09 16:05:04] 统计: hits=27, interp=5, calls=1, entries=8 +[2026-02-09 16:05:04] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-10 02:27:18] 从磁盘加载了 3 条缓存记录 +[2026-02-10 02:27:18] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-10 02:27:25] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-10 02:27:25] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-10 02:27:25] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-10 02:27:25] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-10 02:27:25] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-10 02:27:25] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-10 02:27:25] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-10 02:27:25] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-10 02:27:25] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-10 02:27:25] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=3 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=5 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode, 总命中=6 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode, 总命中=7 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode, 总命中=8 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode, 总命中=9 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode, 总命中=10 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=11 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=13 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode, 总命中=14 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode, 总命中=15 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode, 总命中=16 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode, 总命中=17 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode, 总命中=18 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=19 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=21 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode, 总命中=22 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode, 总命中=23 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode, 总命中=24 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode, 总命中=25 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode, 总命中=26 +[2026-02-10 02:27:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=27 +[2026-02-10 02:27:25] 统计: hits=27, interp=5, calls=0, entries=8 +[2026-02-10 02:27:25] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-10 07:07:17] 从磁盘加载了 3 条缓存记录 +[2026-02-10 07:07:17] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-10 07:07:24] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill +[2026-02-10 07:07:24] [AICB调用] 第1次subprocess调用 +[2026-02-10 07:12:24] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode +[2026-02-10 07:12:24] [AICB调用] 第2次subprocess调用 +[2026-02-10 07:17:25] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode +[2026-02-10 07:17:25] [AICB调用] 第3次subprocess调用 +[2026-02-10 07:22:25] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode +[2026-02-10 07:22:25] [AICB调用] 第4次subprocess调用 +[2026-02-10 07:27:25] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode +[2026-02-10 07:27:25] [AICB调用] 第5次subprocess调用 +[2026-02-10 07:32:25] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode +[2026-02-10 07:32:25] [AICB调用] 第6次subprocess调用 +[2026-02-10 07:37:26] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode +[2026-02-10 07:37:26] [AICB调用] 第7次subprocess调用 +[2026-02-10 07:42:27] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode +[2026-02-10 07:42:27] [AICB调用] 第8次subprocess调用 +[2026-02-10 07:47:27] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill +[2026-02-10 07:47:27] [AICB调用] 第9次subprocess调用 +[2026-02-10 07:52:27] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode +[2026-02-10 07:52:27] [AICB调用] 第10次subprocess调用 +[2026-02-10 07:57:27] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode +[2026-02-10 07:57:27] [AICB调用] 第11次subprocess调用 +[2026-02-10 08:02:28] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode +[2026-02-10 08:02:28] [AICB调用] 第12次subprocess调用 +[2026-02-10 08:07:28] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode +[2026-02-10 08:07:28] [AICB调用] 第13次subprocess调用 +[2026-02-10 08:12:28] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode +[2026-02-10 08:12:28] [AICB调用] 第14次subprocess调用 +[2026-02-10 08:17:28] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode +[2026-02-10 08:17:28] [AICB调用] 第15次subprocess调用 +[2026-02-10 08:22:28] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode +[2026-02-10 08:22:28] [AICB调用] 第16次subprocess调用 +[2026-02-10 08:27:28] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill +[2026-02-10 08:27:28] [AICB调用] 第17次subprocess调用 +[2026-02-10 08:32:28] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode +[2026-02-10 08:32:28] [AICB调用] 第18次subprocess调用 +[2026-02-10 08:37:28] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode +[2026-02-10 08:37:28] [AICB调用] 第19次subprocess调用 +[2026-02-10 08:42:28] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode +[2026-02-10 08:42:28] [AICB调用] 第20次subprocess调用 +[2026-02-10 08:47:28] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode +[2026-02-10 08:47:28] [AICB调用] 第21次subprocess调用 +[2026-02-10 08:52:29] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode +[2026-02-10 08:52:29] [AICB调用] 第22次subprocess调用 +[2026-02-10 08:57:29] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode +[2026-02-10 08:57:29] [AICB调用] 第23次subprocess调用 +[2026-02-10 09:02:29] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode +[2026-02-10 09:02:29] [AICB调用] 第24次subprocess调用 +[2026-02-10 09:07:29] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill +[2026-02-10 09:07:29] [AICB调用] 第25次subprocess调用 +[2026-02-13 10:57:53] 从磁盘加载了 3 条缓存记录 +[2026-02-13 10:57:53] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-13 10:57:59] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill +[2026-02-13 10:57:59] [AICB调用] 第1次subprocess调用 +[2026-02-13 10:58:49] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode +[2026-02-13 10:58:49] [AICB调用] 第2次subprocess调用 +[2026-02-13 10:58:52] 从磁盘加载了 3 条缓存记录 +[2026-02-13 10:58:52] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-13 10:58:59] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-13 10:58:59] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-13 10:58:59] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-13 10:58:59] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-13 10:58:59] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-13 10:58:59] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-13 10:58:59] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-13 10:58:59] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-13 10:58:59] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-13 10:58:59] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=3 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=5 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode, 总命中=6 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode, 总命中=7 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode, 总命中=8 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode, 总命中=9 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode, 总命中=10 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=11 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=13 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode, 总命中=14 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode, 总命中=15 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode, 总命中=16 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode, 总命中=17 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode, 总命中=18 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=19 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode, 总命中=21 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=101, phase=decode, 总命中=22 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=102, phase=decode, 总命中=23 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=103, phase=decode, 总命中=24 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=104, phase=decode, 总命中=25 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=105, phase=decode, 总命中=26 +[2026-02-13 10:58:59] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode, 总命中=27 +[2026-02-13 10:58:59] 统计: hits=27, interp=5, calls=0, entries=8 +[2026-02-13 10:58:59] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-13 10:59:41] 从磁盘加载了 3 条缓存记录 +[2026-02-13 10:59:41] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-13 10:59:48] [未命中] key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=prefill +[2026-02-13 10:59:48] [缓存] key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=prefill, layers=48 +[2026-02-13 10:59:48] [未命中] key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=decode +[2026-02-13 10:59:48] [AICB调用] 第1次subprocess调用 +[2026-02-13 11:00:08] [缓存] key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=decode, layers=48 +[2026-02-13 11:00:08] [AICB调用] 第2次subprocess调用 +[2026-02-13 11:01:04] 从磁盘加载了 5 条缓存记录 +[2026-02-13 11:01:04] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-13 11:01:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-02-13 11:01:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-13 11:01:11] [AICB调用] 第1次subprocess调用 +[2026-02-13 11:01:30] [缓存] key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=106, phase=decode, layers=48 +[2026-02-13 11:01:30] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-13 11:01:30] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-13 11:01:30] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-13 11:01:30] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-13 11:01:30] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-13 11:01:30] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-13 11:01:30] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-13 11:01:30] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-13 11:01:30] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-13 11:01:30] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=106, phase=decode, 总命中=3 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=decode, 总命中=5 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=101, phase=decode, 总命中=6 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=102, phase=decode, 总命中=7 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=103, phase=decode, 总命中=8 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=104, phase=decode, 总命中=9 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=105, phase=decode, 总命中=10 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=106, phase=decode, 总命中=11 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=decode, 总命中=13 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=101, phase=decode, 总命中=14 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=102, phase=decode, 总命中=15 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=103, phase=decode, 总命中=16 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=104, phase=decode, 总命中=17 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=105, phase=decode, 总命中=18 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=106, phase=decode, 总命中=19 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=decode, 总命中=21 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=101, phase=decode, 总命中=22 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=102, phase=decode, 总命中=23 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=103, phase=decode, 总命中=24 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=104, phase=decode, 总命中=25 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=105, phase=decode, 总命中=26 +[2026-02-13 11:01:30] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=106, phase=decode, 总命中=27 +[2026-02-13 11:01:30] 统计: hits=27, interp=5, calls=1, entries=11 +[2026-02-13 11:01:30] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-13 11:02:10] 从磁盘加载了 6 条缓存记录 +[2026-02-13 11:02:10] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-13 11:02:17] [未命中] key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=100, phase=prefill +[2026-02-13 11:02:17] [AICB调用] 第1次subprocess调用 +[2026-02-13 11:02:26] [未命中] key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=100, phase=prefill +[2026-02-13 11:02:26] [AICB调用] 第2次subprocess调用 +[2026-02-13 11:02:35] [未命中] key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=100, phase=decode +[2026-02-13 11:02:35] [AICB调用] 第3次subprocess调用 +[2026-02-13 11:02:46] [缓存] key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=100, phase=decode, layers=61 +[2026-02-13 11:02:46] [AICB调用] 第4次subprocess调用 +[2026-02-13 11:02:57] [缓存] key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=106, phase=decode, layers=61 +[2026-02-13 11:02:57] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-13 11:02:57] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-13 11:02:57] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-13 11:02:57] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-13 11:02:57] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-13 11:02:57] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-13 11:02:57] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-13 11:02:57] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-13 11:02:57] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-13 11:02:57] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-13 11:02:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=106, phase=decode, 总命中=1 +[2026-02-13 11:02:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-13 11:02:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=101, phase=decode, 总命中=3 +[2026-02-13 11:02:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=102, phase=decode, 总命中=4 +[2026-02-13 11:02:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=103, phase=decode, 总命中=5 +[2026-02-13 11:02:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=104, phase=decode, 总命中=6 +[2026-02-13 11:02:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=105, phase=decode, 总命中=7 +[2026-02-13 11:02:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=106, phase=decode, 总命中=8 +[2026-02-13 11:02:57] [未命中] key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=100, phase=prefill +[2026-02-13 11:02:57] [AICB调用] 第5次subprocess调用 +[2026-02-13 11:03:07] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=100, phase=decode, 总命中=9 +[2026-02-13 11:03:07] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=101, phase=decode, 总命中=10 +[2026-02-13 11:03:07] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=102, phase=decode, 总命中=11 +[2026-02-13 11:03:07] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=103, phase=decode, 总命中=12 +[2026-02-13 11:03:07] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=104, phase=decode, 总命中=13 +[2026-02-13 11:03:07] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=105, phase=decode, 总命中=14 +[2026-02-13 11:03:07] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=106, phase=decode, 总命中=15 +[2026-02-13 11:03:07] [未命中] key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=100, phase=prefill +[2026-02-13 11:03:07] [AICB调用] 第6次subprocess调用 +[2026-02-13 11:03:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=100, phase=decode, 总命中=16 +[2026-02-13 11:03:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=101, phase=decode, 总命中=17 +[2026-02-13 11:03:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=102, phase=decode, 总命中=18 +[2026-02-13 11:03:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=103, phase=decode, 总命中=19 +[2026-02-13 11:03:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=104, phase=decode, 总命中=20 +[2026-02-13 11:03:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=105, phase=decode, 总命中=21 +[2026-02-13 11:03:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=106, phase=decode, 总命中=22 +[2026-02-13 11:03:16] 统计: hits=22, interp=5, calls=6, entries=13 +[2026-02-13 11:03:16] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-16 05:40:13] 从磁盘加载了 8 条缓存记录 +[2026-02-16 05:40:13] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 05:40:49] 从磁盘加载了 8 条缓存记录 +[2026-02-16 05:40:49] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 06:29:28] 从磁盘加载了 8 条缓存记录 +[2026-02-16 06:29:28] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 06:46:11] 从磁盘加载了 8 条缓存记录 +[2026-02-16 06:46:11] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 06:46:15] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode +[2026-02-16 06:46:15] [AICB调用] 第1次subprocess调用 +[2026-02-16 06:51:15] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode +[2026-02-16 06:51:15] [AICB调用] 第2次subprocess调用 +[2026-02-16 06:51:35] [缓存] key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, layers=48 +[2026-02-16 06:51:35] [AICB调用] 第3次subprocess调用 +[2026-02-16 06:51:54] [缓存] key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, layers=48 +[2026-02-16 06:51:54] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-16 06:51:54] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-16 06:51:54] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-16 06:51:54] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-16 06:51:54] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-16 06:51:54] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-16 06:51:54] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-16 06:51:54] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-16 06:51:54] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-16 06:51:54] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-16 06:51:54] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=1 +[2026-02-16 06:51:54] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode +[2026-02-16 06:51:54] [AICB调用] 第4次subprocess调用 +[2026-02-16 06:52:19] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-16 06:52:19] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=3 +[2026-02-16 06:52:19] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=4 +[2026-02-16 06:52:19] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=5 +[2026-02-16 06:52:19] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=6 +[2026-02-16 06:52:19] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=7 +[2026-02-16 06:52:19] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=8 +[2026-02-16 06:52:19] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode +[2026-02-16 06:52:19] [AICB调用] 第5次subprocess调用 +[2026-02-16 06:55:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=9 +[2026-02-16 06:55:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=10 +[2026-02-16 06:55:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=11 +[2026-02-16 06:55:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=12 +[2026-02-16 06:55:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=13 +[2026-02-16 06:55:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=14 +[2026-02-16 06:55:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=15 +[2026-02-16 06:55:44] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode +[2026-02-16 06:55:44] [AICB调用] 第6次subprocess调用 +[2026-02-16 06:55:54] 从磁盘加载了 10 条缓存记录 +[2026-02-16 06:55:54] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 06:58:40] 从磁盘加载了 10 条缓存记录 +[2026-02-16 06:58:40] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 06:58:44] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode +[2026-02-16 06:58:44] [AICB调用] 第1次subprocess调用 +[2026-02-16 06:59:30] [AICB调用] 第2次subprocess调用 +[2026-02-16 06:59:39] 从磁盘加载了 10 条缓存记录 +[2026-02-16 06:59:39] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 06:59:43] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode +[2026-02-16 06:59:43] [AICB调用] 第1次subprocess调用 +[2026-02-16 07:05:01] 从磁盘加载了 10 条缓存记录 +[2026-02-16 07:05:01] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 07:05:08] [未命中] key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill +[2026-02-16 07:05:08] [AICB调用] 第1次subprocess调用 +[2026-02-16 07:05:28] [缓存] key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, layers=48 +[2026-02-16 07:05:28] [未命中] key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode +[2026-02-16 07:05:28] [AICB调用] 第2次subprocess调用 +[2026-02-16 07:05:48] [缓存] key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, layers=48 +[2026-02-16 07:05:48] [AICB调用] 第3次subprocess调用 +[2026-02-16 09:06:46] 从磁盘加载了 12 条缓存记录 +[2026-02-16 09:06:46] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 09:07:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-02-16 09:07:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-16 09:07:14] [AICB调用] 第1次subprocess调用 +[2026-02-16 09:07:35] [缓存] key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, layers=48 +[2026-02-16 09:07:35] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-16 09:07:35] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-16 09:07:35] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-16 09:07:35] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-16 09:07:35] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-16 09:07:35] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-16 09:07:35] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-16 09:07:35] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-16 09:07:35] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-16 09:07:35] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-02-16 09:07:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-02-16 09:07:35] 统计: hits=27, interp=5, calls=1, entries=18 +[2026-02-16 09:07:35] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-16 09:07:54] 从磁盘加载了 13 条缓存记录 +[2026-02-16 09:07:54] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 09:07:58] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode +[2026-02-16 09:07:58] [AICB调用] 第1次subprocess调用 +[2026-02-16 09:08:38] [AICB调用] 第2次subprocess调用 +[2026-02-16 09:08:46] 从磁盘加载了 13 条缓存记录 +[2026-02-16 09:08:46] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 09:25:22] 从磁盘加载了 13 条缓存记录 +[2026-02-16 09:25:22] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 09:25:26] [未命中] key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode +[2026-02-16 09:25:26] [AICB调用] 第1次subprocess调用 +[2026-02-16 09:26:17] 从磁盘加载了 13 条缓存记录 +[2026-02-16 09:26:17] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-16 09:26:23] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-16 09:26:23] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-16 09:26:23] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-16 09:26:23] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-16 09:26:23] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-16 09:26:23] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-16 09:26:23] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-16 09:26:23] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-16 09:26:23] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-16 09:26:23] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-02-16 09:26:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-02-16 09:26:23] 统计: hits=27, interp=5, calls=0, entries=18 +[2026-02-16 09:26:23] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-16 09:26:24] [缓存] key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, layers=48 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=1 +[2026-02-16 09:26:24] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-16 09:26:24] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-16 09:26:24] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-16 09:26:24] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-16 09:26:24] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-16 09:26:24] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-16 09:26:24] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-16 09:26:24] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-16 09:26:24] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-16 09:26:24] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=2 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=3 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=4 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=5 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=6 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=7 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=8 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=9 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=10 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=11 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=12 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=13 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=14 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=15 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=16 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=17 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=18 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=19 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=20 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=21 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=22 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=23 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=24 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=25 +[2026-02-16 09:26:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=26 +[2026-02-16 09:26:24] 统计: hits=26, interp=5, calls=1, entries=19 +[2026-02-16 09:26:24] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-16 09:26:37] 从磁盘加载了 14 条缓存记录 +[2026-02-16 09:26:37] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 09:26:44] [未命中] key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill +[2026-02-16 09:26:44] [AICB调用] 第1次subprocess调用 +[2026-02-16 09:26:53] [缓存] key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, layers=61 +[2026-02-16 09:26:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-02-16 09:26:53] [未命中] key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode +[2026-02-16 09:26:53] [AICB调用] 第2次subprocess调用 +[2026-02-16 09:27:05] [缓存] key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, layers=61 +[2026-02-16 09:27:05] [AICB调用] 第3次subprocess调用 +[2026-02-16 09:27:16] [缓存] key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, layers=61 +[2026-02-16 09:27:16] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-16 09:27:16] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-16 09:27:16] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-16 09:27:16] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-16 09:27:16] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-16 09:27:16] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-16 09:27:16] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-16 09:27:16] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-16 09:27:16] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-16 09:27:16] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=2 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=4 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=5 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=6 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=7 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=8 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=9 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=10 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=11 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=12 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=13 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=14 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=15 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=16 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=17 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=18 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=19 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=20 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=21 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=22 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=23 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=24 +[2026-02-16 09:27:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=25 +[2026-02-16 09:27:16] 统计: hits=25, interp=5, calls=3, entries=22 +[2026-02-16 09:27:16] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-16 09:27:37] 从磁盘加载了 17 条缓存记录 +[2026-02-16 09:27:37] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-16 09:27:44] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-16 09:27:44] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-16 09:27:44] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-16 09:27:44] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-16 09:27:44] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-16 09:27:44] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-16 09:27:44] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-16 09:27:44] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-16 09:27:44] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-16 09:27:44] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-02-16 09:27:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-02-16 09:27:44] 统计: hits=27, interp=5, calls=0, entries=22 +[2026-02-16 09:27:44] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-16 09:29:28] 从磁盘加载了 17 条缓存记录 +[2026-02-16 09:29:28] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-16 09:29:32] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-16 09:29:32] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-16 09:29:32] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-16 09:29:32] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-16 09:29:32] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-16 09:29:32] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-16 09:29:32] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-16 09:29:32] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-16 09:29:32] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-16 09:29:32] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-02-16 09:29:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-02-16 09:29:32] 统计: hits=27, interp=5, calls=0, entries=22 +[2026-02-16 09:29:32] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-16 09:29:56] 从磁盘加载了 17 条缓存记录 +[2026-02-16 09:29:56] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-16 09:30:00] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-16 09:30:00] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-16 09:30:00] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-16 09:30:00] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-16 09:30:00] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-16 09:30:00] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-16 09:30:00] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-16 09:30:00] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-16 09:30:00] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-16 09:30:00] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-02-16 09:30:00] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-02-16 09:30:00] 统计: hits=27, interp=5, calls=0, entries=22 +[2026-02-16 09:30:00] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-16 09:30:20] 从磁盘加载了 17 条缓存记录 +[2026-02-16 09:30:20] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-16 09:30:26] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-16 09:30:26] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-16 09:30:26] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-16 09:30:26] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-16 09:30:26] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-16 09:30:26] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-16 09:30:26] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-16 09:30:26] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-16 09:30:26] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-16 09:30:26] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-02-16 09:30:26] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-02-16 09:30:26] 统计: hits=27, interp=5, calls=0, entries=22 +[2026-02-16 09:30:26] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-02-16 09:30:36] 从磁盘加载了 17 条缓存记录 +[2026-02-16 09:30:36] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-02-16 09:30:40] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-02-16 09:30:40] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-02-16 09:30:40] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-02-16 09:30:40] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-02-16 09:30:40] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-02-16 09:30:40] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-02-16 09:30:40] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-02-16 09:30:40] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-02-16 09:30:40] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-02-16 09:30:40] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-02-16 09:30:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-02-16 09:30:40] 统计: hits=27, interp=5, calls=0, entries=22 +[2026-02-16 09:30:40] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-12 08:40:46] 从磁盘加载了 17 条缓存记录 +[2026-03-12 08:40:46] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-12 08:40:50] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-12 08:40:50] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-12 08:40:50] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-12 08:40:50] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-12 08:40:50] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-12 08:40:50] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-12 08:40:50] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-12 08:40:50] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-12 08:40:50] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-12 08:40:50] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-12 08:40:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-12 08:40:50] 统计: hits=27, interp=5, calls=0, entries=22 +[2026-03-12 08:40:50] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-check-qoder/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-12 09:38:53] 从磁盘加载了 17 条缓存记录 +[2026-03-12 09:38:53] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-12 09:38:57] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-12 09:38:57] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-12 09:38:57] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-12 09:38:57] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-12 09:38:57] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-12 09:38:57] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-12 09:38:57] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-12 09:38:57] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-12 09:38:57] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-12 09:38:57] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-12 09:38:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-12 09:38:57] 统计: hits=27, interp=5, calls=0, entries=22 +[2026-03-12 09:38:57] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-12 09:39:05] 从磁盘加载了 17 条缓存记录 +[2026-03-12 09:39:05] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-12 09:39:11] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-12 09:39:11] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-12 09:39:11] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-12 09:39:11] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-12 09:39:11] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-12 09:39:11] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-12 09:39:11] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-12 09:39:11] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-12 09:39:11] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-12 09:39:11] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-12 09:39:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-12 09:39:11] 统计: hits=27, interp=5, calls=0, entries=22 +[2026-03-12 09:39:11] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-12 09:40:27] 从磁盘加载了 17 条缓存记录 +[2026-03-12 09:40:27] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-12 09:40:34] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-12 09:40:34] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-12 09:40:34] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-12 09:40:34] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-12 09:40:34] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-12 09:40:34] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-12 09:40:34] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-12 09:40:34] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-12 09:40:34] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-12 09:40:34] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-12 09:40:34] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-12 09:40:34] 统计: hits=27, interp=5, calls=0, entries=22 +[2026-03-12 09:40:34] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-12 09:40:51] 从磁盘加载了 17 条缓存记录 +[2026-03-12 09:40:51] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-12 09:40:57] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-12 09:40:57] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-12 09:40:57] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-12 09:40:57] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-12 09:40:57] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-12 09:40:57] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-12 09:40:57] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-12 09:40:57] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-12 09:40:57] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-12 09:40:57] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-12 09:40:57] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-12 09:40:57] 统计: hits=27, interp=5, calls=0, entries=22 +[2026-03-12 09:40:57] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-12 09:41:15] 从磁盘加载了 17 条缓存记录 +[2026-03-12 09:41:15] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-12 09:41:21] [未命中] key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill +[2026-03-12 09:41:21] [AICB调用] 第1次subprocess调用 +[2026-03-12 09:41:34] [缓存] key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, layers=94 +[2026-03-12 09:41:34] [未命中] key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode +[2026-03-12 09:41:34] [AICB调用] 第2次subprocess调用 +[2026-03-12 09:41:47] [缓存] key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, layers=94 +[2026-03-12 09:41:47] [AICB调用] 第3次subprocess调用 +[2026-03-12 09:42:01] [缓存] key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, layers=94 +[2026-03-12 09:42:01] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-12 09:42:01] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-12 09:42:01] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-12 09:42:01] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-12 09:42:01] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-12 09:42:01] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-12 09:42:01] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-12 09:42:01] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-12 09:42:01] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-12 09:42:01] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=1 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=4 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=5 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=6 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=7 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=8 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=9 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=10 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=11 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=12 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=13 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=14 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=15 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=16 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=17 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=18 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=19 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=20 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=21 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=22 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=23 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=24 +[2026-03-12 09:42:01] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=25 +[2026-03-12 09:42:01] 统计: hits=25, interp=5, calls=3, entries=25 +[2026-03-12 09:42:01] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-13 09:43:49] 从磁盘加载了 20 条缓存记录 +[2026-03-13 09:43:49] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-13 09:44:23] 从磁盘加载了 20 条缓存记录 +[2026-03-13 09:44:23] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-13 09:44:25] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-13 09:44:25] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-13 09:44:25] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-13 09:44:25] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-13 09:44:25] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-13 09:44:25] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-13 09:44:25] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-13 09:44:25] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-13 09:44:25] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-13 09:44:25] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-13 09:44:25] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-13 09:44:25] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-13 09:44:25] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-13 09:45:11] 从磁盘加载了 20 条缓存记录 +[2026-03-13 09:45:11] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-13 09:45:14] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-13 09:45:14] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-13 09:45:14] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-13 09:45:14] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-13 09:45:14] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-13 09:45:14] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-13 09:45:14] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-13 09:45:14] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-13 09:45:14] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-13 09:45:14] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-13 09:45:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-13 09:45:14] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-13 09:45:14] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-13 09:50:08] 从磁盘加载了 20 条缓存记录 +[2026-03-13 09:50:08] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-13 09:50:11] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-13 09:50:11] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-13 09:50:11] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-13 09:50:11] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-13 09:50:11] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-13 09:50:11] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-13 09:50:11] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-13 09:50:11] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-13 09:50:11] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-13 09:50:11] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-13 09:50:11] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-13 09:50:11] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-13 09:50:11] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-13 09:50:12] 从磁盘加载了 20 条缓存记录 +[2026-03-13 09:50:12] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-13 09:50:17] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-13 09:50:17] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-13 09:50:17] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-13 09:50:17] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-13 09:50:17] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-13 09:50:17] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-13 09:50:17] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-13 09:50:17] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-13 09:50:17] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-13 09:50:17] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-13 09:50:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-13 09:50:17] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-13 09:50:17] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-13 09:50:19] 从磁盘加载了 20 条缓存记录 +[2026-03-13 09:50:19] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-13 09:50:23] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-13 09:50:23] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-13 09:50:23] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-13 09:50:23] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-13 09:50:23] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-13 09:50:23] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-13 09:50:23] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-13 09:50:23] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-13 09:50:23] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-13 09:50:23] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-13 09:50:23] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-13 09:50:23] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-13 09:50:23] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-13 09:50:25] 从磁盘加载了 20 条缓存记录 +[2026-03-13 09:50:25] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-13 09:50:30] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-13 09:50:30] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-13 09:50:30] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-13 09:50:30] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-13 09:50:30] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-13 09:50:30] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-13 09:50:30] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-13 09:50:30] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-13 09:50:30] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-13 09:50:30] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-13 09:50:30] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-13 09:50:30] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-13 09:50:30] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-13 10:32:00] 从磁盘加载了 20 条缓存记录 +[2026-03-13 10:32:00] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-13 10:32:02] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-13 10:32:02] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-13 10:32:02] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-13 10:32:02] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-13 10:32:02] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-13 10:32:02] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-13 10:32:02] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-13 10:32:02] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-13 10:32:02] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-13 10:32:02] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-13 10:32:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-13 10:32:02] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-13 10:32:02] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-13 10:34:05] 从磁盘加载了 20 条缓存记录 +[2026-03-13 10:34:05] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-13 12:40:25] 从磁盘加载了 20 条缓存记录 +[2026-03-13 12:40:25] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-13 12:40:27] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-13 12:40:27] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-13 12:40:27] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-13 12:40:27] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-13 12:40:27] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-13 12:40:27] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-13 12:40:27] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-13 12:40:27] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-13 12:40:27] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-13 12:40:27] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-13 12:40:27] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-13 12:40:27] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-13 12:40:27] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-13 12:40:28] 从磁盘加载了 20 条缓存记录 +[2026-03-13 12:40:28] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-13 12:40:33] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-13 12:40:33] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-13 12:40:33] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-13 12:40:33] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-13 12:40:33] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-13 12:40:33] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-13 12:40:33] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-13 12:40:33] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-13 12:40:33] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-13 12:40:33] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-13 12:40:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-13 12:40:33] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-13 12:40:33] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-13 12:40:35] 从磁盘加载了 20 条缓存记录 +[2026-03-13 12:40:35] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-13 12:40:40] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-13 12:40:40] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-13 12:40:40] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-13 12:40:40] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-13 12:40:40] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-13 12:40:40] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-13 12:40:40] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-13 12:40:40] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-13 12:40:40] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-13 12:40:40] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-13 12:40:40] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-13 12:40:40] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-13 12:40:40] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-13 12:40:41] 从磁盘加载了 20 条缓存记录 +[2026-03-13 12:40:41] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-13 12:40:46] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-13 12:40:46] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-13 12:40:46] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-13 12:40:46] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-13 12:40:46] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-13 12:40:46] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-13 12:40:46] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-13 12:40:46] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-13 12:40:46] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-13 12:40:46] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-13 12:40:46] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-13 12:40:46] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-13 12:40:46] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:26:51] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:26:51] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 01:26:53] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:26:53] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:26:53] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:26:53] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:26:53] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:26:53] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:26:53] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:26:53] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:26:53] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:26:53] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:26:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:26:53] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:26:53] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:26:55] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:26:55] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 01:26:59] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:26:59] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:26:59] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:26:59] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:26:59] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:26:59] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:26:59] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:26:59] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:26:59] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:26:59] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:26:59] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:26:59] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:26:59] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:27:01] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:27:01] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 01:27:06] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:27:06] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:27:06] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:27:06] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:27:06] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:27:06] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:27:06] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:27:06] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:27:06] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:27:06] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:27:06] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:27:06] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:27:06] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:27:07] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:27:07] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 01:27:12] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:27:12] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:27:12] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:27:12] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:27:12] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:27:12] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:27:12] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:27:12] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:27:12] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:27:12] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:27:12] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:27:12] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:27:12] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:30:32] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:30:32] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 01:30:35] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:30:35] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:30:35] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:30:35] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:30:35] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:30:35] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:30:35] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:30:35] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:30:35] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:30:35] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:30:35] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:30:35] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:30:35] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:30:44] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:30:44] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 01:30:47] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:30:47] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:30:47] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:30:47] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:30:47] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:30:47] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:30:47] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:30:47] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:30:47] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:30:47] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:30:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:30:47] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:30:47] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:30:48] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:30:48] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 01:30:53] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:30:53] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:30:53] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:30:53] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:30:53] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:30:53] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:30:53] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:30:53] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:30:53] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:30:53] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:30:53] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:30:53] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:30:53] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:30:55] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:30:55] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 01:30:59] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:30:59] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:30:59] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:30:59] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:30:59] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:30:59] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:30:59] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:30:59] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:30:59] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:30:59] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:30:59] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:30:59] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:30:59] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:31:01] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:31:01] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 01:31:06] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:31:06] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:31:06] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:31:06] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:31:06] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:31:06] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:31:06] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:31:06] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:31:06] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:31:06] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:31:06] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:31:06] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:31:06] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:36:36] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:36:36] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 01:36:38] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:36:38] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:36:38] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:36:38] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:36:38] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:36:38] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:36:38] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:36:38] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:36:38] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:36:38] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:36:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:36:38] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:36:38] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:36:40] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:36:40] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 01:36:45] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:36:45] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:36:45] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:36:45] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:36:45] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:36:45] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:36:45] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:36:45] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:36:45] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:36:45] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:36:45] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:36:45] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:36:45] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:36:46] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:36:46] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 01:36:51] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:36:51] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:36:51] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:36:51] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:36:51] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:36:51] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:36:51] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:36:51] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:36:51] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:36:51] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:36:51] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:36:51] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:36:51] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:36:53] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:36:53] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 01:36:58] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:36:58] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:36:58] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:36:58] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:36:58] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:36:58] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:36:58] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:36:58] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:36:58] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:36:58] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:36:58] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:36:58] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:36:58] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:41:54] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:41:54] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 01:41:56] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:41:56] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:41:56] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:41:56] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:41:56] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:41:56] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:41:56] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:41:56] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:41:56] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:41:56] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:41:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:41:56] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:41:56] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:41:58] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:41:58] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 01:42:03] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:42:03] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:42:03] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:42:03] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:42:03] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:42:03] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:42:03] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:42:03] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:42:03] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:42:03] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:42:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:42:03] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:42:03] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:42:04] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:42:04] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 01:42:09] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:42:09] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:42:09] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:42:09] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:42:09] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:42:09] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:42:09] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:42:09] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:42:09] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:42:09] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:42:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:42:09] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:42:09] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 01:42:11] 从磁盘加载了 20 条缓存记录 +[2026-03-14 01:42:11] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 01:42:16] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 01:42:16] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 01:42:16] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 01:42:16] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 01:42:16] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 01:42:16] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 01:42:16] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 01:42:16] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 01:42:16] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 01:42:16] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 01:42:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 01:42:16] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 01:42:16] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:06:35] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:06:35] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:06:37] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:06:37] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:06:37] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:06:37] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:06:37] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:06:37] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:06:37] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:06:37] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:06:37] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:06:37] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:06:37] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:06:37] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:06:37] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:06:39] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:06:39] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:06:44] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:06:44] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:06:44] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:06:44] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:06:44] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:06:44] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:06:44] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:06:44] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:06:44] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:06:44] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:06:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:06:44] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:06:44] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:06:45] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:06:45] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:06:53] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:06:53] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:06:56] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:06:56] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:06:56] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:06:56] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:06:56] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:06:56] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:06:56] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:06:56] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:06:56] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:06:56] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:06:56] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:06:56] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:06:56] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:06:57] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:06:57] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:07:02] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:07:02] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:07:02] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:07:02] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:07:02] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:07:02] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:07:02] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:07:02] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:07:02] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:07:02] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:07:02] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:07:02] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:07:02] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:07:04] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:07:04] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 03:07:09] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:07:09] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:07:09] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:07:09] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:07:09] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:07:09] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:07:09] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:07:09] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:07:09] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:07:09] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:07:09] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:07:09] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:07:09] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:07:11] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:07:11] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:07:11] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:07:11] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:07:13] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:07:13] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:07:13] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:07:13] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:07:13] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:07:13] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:07:13] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:07:13] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:07:13] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:07:13] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:07:13] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:07:13] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:07:13] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:07:15] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:07:15] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:07:16] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:07:16] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:07:16] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:07:16] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:07:16] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:07:16] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:07:16] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:07:16] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:07:16] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:07:16] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:07:16] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:07:16] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:07:16] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:07:20] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:07:20] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:07:20] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:07:20] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:07:20] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:07:20] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:07:20] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:07:20] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:07:20] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:07:20] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:07:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:07:20] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:07:20] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:07:21] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:07:21] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 03:07:26] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:07:26] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:07:26] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:07:26] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:07:26] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:07:26] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:07:26] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:07:26] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:07:26] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:07:26] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:07:26] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:07:26] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:07:26] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:07:28] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:07:28] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:07:33] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:07:33] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:07:33] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:07:33] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:07:33] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:07:33] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:07:33] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:07:33] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:07:33] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:07:33] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:07:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:07:33] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:07:33] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:16:15] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:16:15] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:16:17] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:16:17] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:16:17] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:16:17] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:16:17] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:16:17] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:16:17] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:16:17] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:16:17] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:16:17] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:16:17] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:16:17] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:16:17] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:16:19] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:16:19] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:16:23] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:16:23] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:16:23] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:16:23] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:16:23] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:16:23] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:16:23] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:16:23] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:16:23] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:16:23] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:16:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:16:23] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:16:23] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:16:25] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:16:25] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 03:16:30] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:16:30] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:16:30] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:16:30] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:16:30] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:16:30] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:16:30] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:16:30] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:16:30] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:16:30] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:16:30] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:16:30] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:16:30] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:16:32] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:16:32] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:16:36] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:16:36] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:16:36] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:16:36] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:16:36] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:16:36] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:16:36] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:16:36] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:16:36] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:16:36] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:16:36] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:16:36] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:16:36] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:31:35] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:31:35] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:31:38] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:31:38] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:31:38] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:31:38] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:31:38] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:31:38] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:31:38] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:31:38] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:31:38] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:31:38] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:31:38] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:31:38] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:31:38] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:31:39] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:31:39] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:31:44] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:31:44] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:31:44] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:31:44] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:31:44] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:31:44] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:31:44] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:31:44] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:31:44] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:31:44] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:31:44] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:31:44] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:31:45] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:31:45] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 03:31:50] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:31:50] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:31:50] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:31:50] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:31:50] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:31:50] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:31:50] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:31:50] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:31:50] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:31:50] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:31:50] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:31:50] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:31:50] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:31:52] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:31:52] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:31:57] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:31:57] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:31:57] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:31:57] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:31:57] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:31:57] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:31:57] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:31:57] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:31:57] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:31:57] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:31:57] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:31:57] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:31:57] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:56:12] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:56:12] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:56:14] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:56:14] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:56:14] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:56:14] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:56:14] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:56:14] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:56:14] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:56:14] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:56:14] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:56:14] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:56:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:56:14] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:56:14] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:56:16] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:56:16] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:56:21] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:56:21] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:56:21] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:56:21] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:56:21] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:56:21] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:56:21] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:56:21] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:56:21] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:56:21] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:56:21] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:56:21] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:56:21] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:56:22] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:56:22] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 03:56:27] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:56:27] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:56:27] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:56:27] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:56:27] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:56:27] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:56:27] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:56:27] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:56:27] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:56:27] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:56:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:56:27] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:56:27] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:56:29] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:56:29] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:56:31] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:56:31] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:56:33] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:56:33] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:56:33] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:56:33] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:56:33] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:56:33] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:56:33] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:56:33] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:56:33] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:56:33] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:56:33] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:56:33] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:56:33] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:56:34] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:56:34] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:56:34] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:56:34] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:56:34] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:56:34] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:56:34] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:56:34] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:56:34] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:56:34] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:56:34] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:56:34] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:56:34] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:56:35] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:56:35] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:56:40] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:56:40] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:56:40] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:56:40] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:56:40] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:56:40] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:56:40] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:56:40] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:56:40] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:56:40] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:56:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:56:40] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:56:40] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:56:41] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:56:41] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:56:46] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:56:46] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 03:56:46] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 03:56:46] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:56:46] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:56:46] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:56:46] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:56:46] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:56:46] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:56:46] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:56:46] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:56:46] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:56:46] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:56:46] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 03:56:46] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:56:46] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:56:46] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:56:46] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:56:46] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:56:47] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:56:47] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:56:47] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 03:56:48] 从磁盘加载了 20 条缓存记录 +[2026-03-14 03:56:48] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 03:56:53] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 03:56:53] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 03:56:53] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 03:56:53] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 03:56:53] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 03:56:53] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 03:56:53] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 03:56:53] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 03:56:53] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 03:56:53] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 03:56:53] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 03:56:53] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 03:56:53] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:06:22] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:06:22] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:06:24] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:06:24] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:06:24] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:06:24] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:06:24] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:06:24] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:06:24] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:06:24] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:06:24] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:06:24] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:06:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:06:24] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:06:24] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:06:26] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:06:26] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:06:31] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:06:31] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:06:31] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:06:31] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:06:31] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:06:31] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:06:31] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:06:31] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:06:31] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:06:31] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:06:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:06:31] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:06:31] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:06:32] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:06:32] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 04:06:37] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:06:37] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:06:37] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:06:37] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:06:37] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:06:37] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:06:37] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:06:37] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:06:37] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:06:37] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:06:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:06:37] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:06:37] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:06:38] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:06:38] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:06:39] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:06:39] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:06:41] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:06:41] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:06:41] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:06:41] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:06:41] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:06:41] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:06:41] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:06:41] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:06:41] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:06:41] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:06:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:06:41] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:06:41] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:06:42] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:06:42] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:06:44] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:06:44] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:06:44] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:06:44] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:06:44] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:06:44] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:06:44] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:06:44] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:06:44] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:06:44] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:06:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:06:44] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:06:44] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:06:47] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:06:47] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:06:47] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:06:47] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:06:47] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:06:47] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:06:47] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:06:47] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:06:47] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:06:47] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:06:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:06:47] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:06:47] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:06:49] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:06:49] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 04:06:54] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:06:54] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:06:54] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:06:54] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:06:54] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:06:54] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:06:54] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:06:54] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:06:54] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:06:54] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:06:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:06:54] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:06:54] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:06:55] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:06:55] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:07:00] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:07:00] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:07:00] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:07:00] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:07:00] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:07:00] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:07:00] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:07:00] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:07:00] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:07:00] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:07:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:07:00] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:07:00] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:09:38] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:09:38] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:09:40] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:09:40] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:09:40] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:09:40] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:09:40] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:09:40] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:09:40] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:09:40] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:09:40] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:09:40] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:09:40] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:09:40] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:09:40] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:09:42] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:09:42] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:09:47] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:09:47] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:09:47] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:09:47] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:09:47] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:09:47] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:09:47] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:09:47] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:09:47] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:09:47] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:09:47] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:09:47] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:09:47] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:09:48] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:09:48] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 04:09:53] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:09:53] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:09:53] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:09:53] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:09:53] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:09:53] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:09:53] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:09:53] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:09:53] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:09:53] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:09:53] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:09:53] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:09:53] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:09:55] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:09:55] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:10:00] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:10:00] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:10:00] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:10:00] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:10:00] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:10:00] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:10:00] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:10:00] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:10:00] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:10:00] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:10:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:10:00] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:10:00] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:12:13] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:12:13] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:12:16] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:12:16] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:12:16] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:12:16] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:12:16] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:12:16] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:12:16] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:12:16] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:12:16] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:12:16] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:12:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:12:16] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:12:16] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:13:03] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:13:03] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:13:05] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:13:05] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:13:05] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:13:05] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:13:05] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:13:05] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:13:05] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:13:05] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:13:05] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:13:05] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:13:05] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:13:05] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:13:05] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:13:18] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:13:18] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:13:20] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:13:20] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:13:20] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:13:20] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:13:20] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:13:20] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:13:20] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:13:20] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:13:20] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:13:20] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:13:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:13:20] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:13:20] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:17:00] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:17:00] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:17:03] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:17:03] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:17:03] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:17:03] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:17:03] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:17:03] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:17:03] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:17:03] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:17:03] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:17:03] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:17:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:17:03] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:17:03] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:17:04] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:17:04] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:17:09] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:17:09] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:17:09] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:17:09] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:17:09] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:17:09] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:17:09] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:17:09] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:17:09] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:17:09] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:17:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:17:09] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:17:09] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:17:11] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:17:11] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 04:17:16] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:17:16] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:17:16] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:17:16] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:17:16] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:17:16] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:17:16] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:17:16] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:17:16] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:17:16] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:17:16] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:17:16] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:17:16] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:17:17] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:17:17] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:17:22] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:17:22] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:17:22] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:17:22] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:17:22] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:17:22] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:17:22] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:17:22] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:17:22] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:17:22] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:17:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:17:22] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:17:22] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:58:05] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:58:05] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:58:08] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:58:08] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:58:08] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:58:08] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:58:08] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:58:08] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:58:08] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:58:08] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:58:08] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:58:08] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:58:08] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:58:08] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:58:08] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:58:09] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:58:09] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:58:14] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:58:14] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:58:14] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:58:14] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:58:14] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:58:14] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:58:14] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:58:14] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:58:14] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:58:14] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:58:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:58:14] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:58:14] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:58:16] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:58:16] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 04:58:21] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:58:21] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:58:21] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:58:21] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:58:21] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:58:21] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:58:21] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:58:21] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:58:21] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:58:21] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:58:21] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:58:21] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:58:21] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 04:58:22] 从磁盘加载了 20 条缓存记录 +[2026-03-14 04:58:22] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 04:58:27] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 04:58:27] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 04:58:27] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 04:58:27] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 04:58:27] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 04:58:27] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 04:58:27] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 04:58:27] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 04:58:27] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 04:58:27] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 04:58:27] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 04:58:27] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 04:58:27] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 08:00:12] 从磁盘加载了 20 条缓存记录 +[2026-03-14 08:00:12] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 08:00:16] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 08:00:16] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 08:00:16] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 08:00:16] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 08:00:16] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 08:00:16] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 08:00:16] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 08:00:16] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 08:00:16] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 08:00:16] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 08:00:16] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 08:00:16] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 08:00:16] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 08:00:25] 从磁盘加载了 20 条缓存记录 +[2026-03-14 08:00:25] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 08:00:32] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 08:00:32] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 08:00:32] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 08:00:32] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 08:00:32] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 08:00:32] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 08:00:32] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 08:00:32] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 08:00:32] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 08:00:32] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 08:00:32] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 08:00:32] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 08:00:32] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 08:01:20] 从磁盘加载了 20 条缓存记录 +[2026-03-14 08:01:20] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 08:01:23] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 08:01:23] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 08:01:23] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 08:01:23] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 08:01:23] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 08:01:23] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 08:01:23] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 08:01:23] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 08:01:23] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 08:01:23] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 08:01:23] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 08:01:23] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 08:01:23] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 08:01:24] 从磁盘加载了 20 条缓存记录 +[2026-03-14 08:01:24] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 08:01:29] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 08:01:29] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 08:01:29] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 08:01:29] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 08:01:29] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 08:01:29] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 08:01:29] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 08:01:29] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 08:01:29] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 08:01:29] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 08:01:29] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 08:01:29] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 08:01:29] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 08:01:31] 从磁盘加载了 20 条缓存记录 +[2026-03-14 08:01:31] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 08:01:36] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 08:01:36] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 08:01:36] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 08:01:36] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 08:01:36] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 08:01:36] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 08:01:36] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 08:01:36] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 08:01:36] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 08:01:36] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 08:01:36] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 08:01:36] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 08:01:36] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 08:01:37] 从磁盘加载了 20 条缓存记录 +[2026-03-14 08:01:37] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 08:01:42] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 08:01:42] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 08:01:42] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 08:01:42] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 08:01:42] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 08:01:42] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 08:01:42] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 08:01:42] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 08:01:42] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 08:01:42] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 08:01:42] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 08:01:42] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 08:01:42] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:13:22] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:13:22] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:13:24] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:13:24] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:13:24] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:13:24] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:13:24] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:13:24] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:13:24] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:13:24] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:13:24] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:13:24] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:13:24] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:13:24] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:13:24] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:13:26] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:13:26] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:13:31] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:13:31] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:13:31] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:13:31] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:13:31] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:13:31] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:13:31] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:13:31] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:13:31] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:13:31] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:13:31] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:13:31] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:13:31] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:13:32] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:13:32] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 15:13:37] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:13:37] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:13:37] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:13:37] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:13:37] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:13:37] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:13:37] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:13:37] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:13:37] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:13:37] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:13:37] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:13:37] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:13:37] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:13:39] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:13:39] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:13:44] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:13:44] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:13:44] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:13:44] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:13:44] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:13:44] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:13:44] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:13:44] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:13:44] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:13:44] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:13:44] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:13:44] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:13:44] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:21:11] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:21:11] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:21:14] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:21:14] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:21:14] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:21:14] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:21:14] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:21:14] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:21:14] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:21:14] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:21:14] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:21:14] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:21:14] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:21:14] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:21:14] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:21:15] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:21:15] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:21:20] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:21:20] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:21:20] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:21:20] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:21:20] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:21:20] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:21:20] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:21:20] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:21:20] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:21:20] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:21:20] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:21:20] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:21:20] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:21:22] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:21:22] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 15:21:27] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:21:27] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:21:27] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:21:27] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:21:27] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:21:27] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:21:27] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:21:27] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:21:27] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:21:27] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:21:27] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:21:27] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:21:27] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:21:28] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:21:28] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:21:33] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:21:33] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:21:33] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:21:33] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:21:33] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:21:33] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:21:33] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:21:33] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:21:33] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:21:33] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:21:33] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:21:33] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:21:33] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:25:41] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:25:41] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:25:44] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:25:44] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:25:44] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:25:44] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:25:44] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:25:44] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:25:44] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:25:44] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:25:44] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:25:44] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:25:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:25:44] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:25:44] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:25:45] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:25:45] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:25:50] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:25:50] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:25:50] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:25:50] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:25:50] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:25:50] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:25:50] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:25:50] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:25:50] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:25:50] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:25:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:25:50] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:25:50] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:25:52] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:25:52] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 15:25:57] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:25:57] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:25:57] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:25:57] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:25:57] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:25:57] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:25:57] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:25:57] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:25:57] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:25:57] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:25:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:25:57] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:25:57] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:25:58] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:25:58] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:26:03] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:26:03] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:26:03] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:26:03] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:26:03] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:26:03] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:26:03] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:26:03] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:26:03] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:26:03] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:26:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:26:03] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:26:03] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:31:41] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:31:41] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:31:44] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:31:44] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:31:44] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:31:44] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:31:44] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:31:44] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:31:44] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:31:44] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:31:44] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:31:44] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:31:44] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:31:44] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:31:44] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:31:45] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:31:45] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:31:50] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:31:50] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:31:50] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:31:50] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:31:50] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:31:50] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:31:50] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:31:50] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:31:50] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:31:50] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:31:50] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:31:50] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:31:50] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:31:52] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:31:52] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 15:31:57] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:31:57] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:31:57] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:31:57] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:31:57] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:31:57] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:31:57] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:31:57] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:31:57] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:31:57] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:31:57] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:31:57] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:31:57] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:31:59] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:31:59] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:32:03] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:32:03] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:32:03] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:32:03] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:32:03] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:32:03] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:32:03] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:32:03] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:32:03] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:32:03] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:32:03] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:32:03] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:32:03] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:32:39] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:32:39] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:32:41] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:32:41] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:32:41] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:32:41] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:32:41] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:32:41] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:32:41] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:32:41] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:32:41] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:32:41] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:32:41] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:32:41] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:32:41] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:32:43] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:32:43] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:32:48] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:32:48] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:32:48] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:32:48] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:32:48] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:32:48] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:32:48] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:32:48] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:32:48] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:32:48] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:32:48] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:32:48] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:32:48] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:32:49] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:32:49] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-14 15:32:54] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:32:54] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:32:54] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:32:54] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:32:54] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:32:54] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:32:54] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:32:54] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:32:54] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:32:54] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:32:54] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:32:54] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:32:54] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-14 15:32:56] 从磁盘加载了 20 条缓存记录 +[2026-03-14 15:32:56] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-14 15:33:00] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-14 15:33:00] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-14 15:33:00] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-14 15:33:00] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-14 15:33:00] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-14 15:33:00] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-14 15:33:00] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-14 15:33:00] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-14 15:33:00] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-14 15:33:00] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-14 15:33:00] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-14 15:33:00] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-14 15:33:00] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-15 02:03:04] 从磁盘加载了 20 条缓存记录 +[2026-03-15 02:03:04] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-15 02:03:06] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-15 02:03:06] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-15 02:03:06] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-15 02:03:06] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-15 02:03:06] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-15 02:03:06] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-15 02:03:06] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-15 02:03:06] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-15 02:03:06] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-15 02:03:06] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-15 02:03:06] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-15 02:03:06] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-15 02:03:06] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-15 02:03:08] 从磁盘加载了 20 条缓存记录 +[2026-03-15 02:03:08] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-15 02:03:12] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-15 02:03:12] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-15 02:03:12] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-15 02:03:12] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-15 02:03:12] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-15 02:03:12] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-15 02:03:12] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-15 02:03:12] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-15 02:03:12] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-15 02:03:12] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-15 02:03:12] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-15 02:03:12] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-15 02:03:12] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-15 02:03:14] 从磁盘加载了 20 条缓存记录 +[2026-03-15 02:03:14] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-15 02:03:19] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-15 02:03:19] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-15 02:03:19] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-15 02:03:19] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-15 02:03:19] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-15 02:03:19] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-15 02:03:19] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-15 02:03:19] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-15 02:03:19] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-15 02:03:19] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-15 02:03:19] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-15 02:03:19] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-15 02:03:19] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-15 02:03:20] 从磁盘加载了 20 条缓存记录 +[2026-03-15 02:03:20] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-15 02:03:25] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-15 02:03:25] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-15 02:03:25] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-15 02:03:25] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-15 02:03:25] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-15 02:03:25] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-15 02:03:25] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-15 02:03:25] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-15 02:03:25] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-15 02:03:25] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-15 02:03:25] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-15 02:03:25] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-15 02:03:25] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-16 11:07:00] 从磁盘加载了 20 条缓存记录 +[2026-03-16 11:07:00] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=1 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-16 11:07:03] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-16 11:07:03] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-16 11:07:03] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-16 11:07:03] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-16 11:07:03] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-16 11:07:03] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-16 11:07:03] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-16 11:07:03] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-16 11:07:03] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-16 11:07:03] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=4 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=12 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode, 总命中=20 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-16 11:07:03] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-16 11:07:03] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-16 11:07:03] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-16 11:07:04] 从磁盘加载了 20 条缓存记录 +[2026-03-16 11:07:04] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-16 11:07:09] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-16 11:07:09] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-16 11:07:09] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-16 11:07:09] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-16 11:07:09] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-16 11:07:09] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-16 11:07:09] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-16 11:07:09] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-16 11:07:09] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-16 11:07:09] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-16 11:07:09] [命中] 精确匹配: key=model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-16 11:07:09] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-16 11:07:09] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-16 11:07:10] 从磁盘加载了 20 条缓存记录 +[2026-03-16 11:07:10] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=2 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=3 +[2026-03-16 11:07:15] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-16 11:07:15] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-16 11:07:15] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-16 11:07:15] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-16 11:07:15] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-16 11:07:15] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-16 11:07:15] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-16 11:07:15] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-16 11:07:15] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-16 11:07:15] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=4 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-16 11:07:15] [命中] 精确匹配: key=model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-16 11:07:15] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-16 11:07:15] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json +[2026-03-16 11:07:17] 从磁盘加载了 20 条缓存记录 +[2026-03-16 11:07:17] AICBGlobalCache 初始化完成, 缓存目录: /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/cache +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=1 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=2 +[2026-03-16 11:07:22] 插值: seq=101, lower=100, upper=106, alpha=0.1667 +[2026-03-16 11:07:22] [插值] seq=101, 使用邻居插值, 总插值=1 +[2026-03-16 11:07:22] 插值: seq=102, lower=101, upper=106, alpha=0.2000 +[2026-03-16 11:07:22] [插值] seq=102, 使用邻居插值, 总插值=2 +[2026-03-16 11:07:22] 插值: seq=103, lower=102, upper=106, alpha=0.2500 +[2026-03-16 11:07:22] [插值] seq=103, 使用邻居插值, 总插值=3 +[2026-03-16 11:07:22] 插值: seq=104, lower=103, upper=106, alpha=0.3333 +[2026-03-16 11:07:22] [插值] seq=104, 使用邻居插值, 总插值=4 +[2026-03-16 11:07:22] 插值: seq=105, lower=104, upper=106, alpha=0.5000 +[2026-03-16 11:07:22] [插值] seq=105, 使用邻居插值, 总插值=5 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=3 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=4 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=5 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=6 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=7 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=8 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=9 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=10 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=11 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=12 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=13 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=14 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=15 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=16 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=17 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=18 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=19 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill, 总命中=20 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode, 总命中=21 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode, 总命中=22 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode, 总命中=23 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode, 总命中=24 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode, 总命中=25 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode, 总命中=26 +[2026-03-16 11:07:22] [命中] 精确匹配: key=model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode, 总命中=27 +[2026-03-16 11:07:22] 统计: hits=27, interp=5, calls=0, entries=25 +[2026-03-16 11:07:22] 查表索引已保存到 /disk1/futianhao/software02/simai-mem-pr/vidur-alibabacloud/data/aicb_workload/lookup_table.json diff --git a/vidur-alibabacloud/data/aicb_workload/lookup_table.json b/vidur-alibabacloud/data/aicb_workload/lookup_table.json new file mode 100644 index 00000000..1cfea1c2 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/lookup_table.json @@ -0,0 +1,1898 @@ +{ + "model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=100, phase=decode": { + "num_layers": 48, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ] + }, + "model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=100, phase=decode": { + "num_layers": 61, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60 + ] + }, + "model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=0, seq=0, phase=decode": { + "num_layers": 48, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ] + }, + "model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=prefill": { + "num_layers": 94, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93 + ] + }, + "model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=100, phase=decode": { + "num_layers": 94, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93 + ] + }, + "model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=decode": { + "num_layers": 48, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ] + }, + "model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=106, phase=decode": { + "num_layers": 48, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ] + }, + "model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=100, phase=decode": { + "num_layers": 61, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60 + ] + }, + "model=Qwen3-Moe-235B, ws=32, tp=4, pp=1, ep=4, bs=1, seq=106, phase=decode": { + "num_layers": 94, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93 + ] + }, + "model=Qwen3-Next-80B, ws=8, tp=1, pp=1, ep=8, bs=1, seq=100, phase=prefill": { + "num_layers": 48, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ] + }, + "model=DeepSeek-671B, ws=16, tp=8, pp=1, ep=16, bs=1, seq=100, phase=prefill": { + "num_layers": 61, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60 + ] + }, + "model=DeepSeek-671B, ws=48, tp=8, pp=1, ep=48, bs=1, seq=106, phase=decode": { + "num_layers": 61, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60 + ] + }, + "model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=100, phase=decode": { + "num_layers": 48, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ] + }, + "model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=100, phase=decode": { + "num_layers": 94, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93 + ] + }, + "model=DeepSeek-671B, ws=64, tp=8, pp=1, ep=8, bs=1, seq=106, phase=decode": { + "num_layers": 61, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60 + ] + }, + "model=Qwen3-Moe-235B, ws=8, tp=4, pp=1, ep=8, bs=1, seq=100, phase=prefill": { + "num_layers": 94, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93 + ] + }, + "model=Qwen3-Next-80B, ws=2, tp=1, pp=1, ep=2, bs=1, seq=100, phase=prefill": { + "num_layers": 48, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ] + }, + "model=Qwen3-Next-80B, ws=6, tp=1, pp=1, ep=6, bs=1, seq=106, phase=decode": { + "num_layers": 48, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ] + }, + "model=Qwen3-Next-80B, ws=32, tp=1, pp=1, ep=32, bs=1, seq=106, phase=decode": { + "num_layers": 48, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ] + }, + "model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=106, phase=decode": { + "num_layers": 94, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93 + ] + }, + "model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=101, phase=decode": { + "num_layers": 94, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93 + ] + }, + "model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=102, phase=decode": { + "num_layers": 94, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93 + ] + }, + "model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=103, phase=decode": { + "num_layers": 94, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93 + ] + }, + "model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=104, phase=decode": { + "num_layers": 94, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93 + ] + }, + "model=Qwen3-Moe-235B, ws=24, tp=4, pp=1, ep=24, bs=1, seq=105, phase=decode": { + "num_layers": 94, + "layer_ids": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93 + ] + } +} \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/vidur-DeepSeek-671B-world_size32-tp1-pp1-ep32-bs3-seq4096-decode.csv b/vidur-alibabacloud/data/aicb_workload/vidur-DeepSeek-671B-world_size32-tp1-pp1-ep32-bs3-seq4096-decode.csv new file mode 100644 index 00000000..a67272cf --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/vidur-DeepSeek-671B-world_size32-tp1-pp1-ep32-bs3-seq4096-decode.csv @@ -0,0 +1,4 @@ +layer_name,comp_time,comm_size +attention,179376,0 +mlp,30314,0 +moe,208498,521472 \ No newline at end of file diff --git a/vidur-alibabacloud/data/aicb_workload/vidur-DeepSeek-671B-world_size32-tp1-pp1-ep32-bs4-seq4096-decode.csv b/vidur-alibabacloud/data/aicb_workload/vidur-DeepSeek-671B-world_size32-tp1-pp1-ep32-bs4-seq4096-decode.csv new file mode 100644 index 00000000..aacb3d41 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/vidur-DeepSeek-671B-world_size32-tp1-pp1-ep32-bs4-seq4096-decode.csv @@ -0,0 +1,123 @@ +layer_id layer_name comp_time comm_size +0 attention 189453 0 +0 mlp 30574 0 +1 attention 189453 0 +1 mlp 30574 0 +2 attention 189453 0 +2 mlp 30574 0 +3 attention 189453 0 +3 moe 239215 695296 +4 attention 189453 0 +4 moe 239215 695296 +5 attention 189453 0 +5 moe 239215 695296 +6 attention 189453 0 +6 moe 239215 695296 +7 attention 189453 0 +7 moe 239215 695296 +8 attention 189453 0 +8 moe 239215 695296 +9 attention 189453 0 +9 moe 239215 695296 +10 attention 189453 0 +10 moe 239215 695296 +11 attention 189453 0 +11 moe 239215 695296 +12 attention 189453 0 +12 moe 239215 695296 +13 attention 189453 0 +13 moe 239215 695296 +14 attention 189453 0 +14 moe 239215 695296 +15 attention 189453 0 +15 moe 239215 695296 +16 attention 189453 0 +16 moe 239215 695296 +17 attention 189453 0 +17 moe 239215 695296 +18 attention 189453 0 +18 moe 239215 695296 +19 attention 189453 0 +19 moe 239215 695296 +20 attention 189453 0 +20 moe 239215 695296 +21 attention 189453 0 +21 moe 239215 695296 +22 attention 189453 0 +22 moe 239215 695296 +23 attention 189453 0 +23 moe 239215 695296 +24 attention 189453 0 +24 moe 239215 695296 +25 attention 189453 0 +25 moe 239215 695296 +26 attention 189453 0 +26 moe 239215 695296 +27 attention 189453 0 +27 moe 239215 695296 +28 attention 189453 0 +28 moe 239215 695296 +29 attention 189453 0 +29 moe 239215 695296 +30 attention 189453 0 +30 moe 239215 695296 +31 attention 189453 0 +31 moe 239215 695296 +32 attention 189453 0 +32 moe 239215 695296 +33 attention 189453 0 +33 moe 239215 695296 +34 attention 189453 0 +34 moe 239215 695296 +35 attention 189453 0 +35 moe 239215 695296 +36 attention 189453 0 +36 moe 239215 695296 +37 attention 189453 0 +37 moe 239215 695296 +38 attention 189453 0 +38 moe 239215 695296 +39 attention 189453 0 +39 moe 239215 695296 +40 attention 189453 0 +40 moe 239215 695296 +41 attention 189453 0 +41 moe 239215 695296 +42 attention 189453 0 +42 moe 239215 695296 +43 attention 189453 0 +43 moe 239215 695296 +44 attention 189453 0 +44 moe 239215 695296 +45 attention 189453 0 +45 moe 239215 695296 +46 attention 189453 0 +46 moe 239215 695296 +47 attention 189453 0 +47 moe 239215 695296 +48 attention 189453 0 +48 moe 239215 695296 +49 attention 189453 0 +49 moe 239215 695296 +50 attention 189453 0 +50 moe 239215 695296 +51 attention 189453 0 +51 moe 239215 695296 +52 attention 189453 0 +52 moe 239215 695296 +53 attention 189453 0 +53 moe 239215 695296 +54 attention 189453 0 +54 moe 239215 695296 +55 attention 189453 0 +55 moe 239215 695296 +56 attention 189453 0 +56 moe 239215 695296 +57 attention 189453 0 +57 moe 239215 695296 +58 attention 189453 0 +58 moe 239215 695296 +59 attention 189453 0 +59 moe 239215 695296 +60 attention 189453 0 +60 moe 239215 695296 diff --git a/vidur-alibabacloud/data/aicb_workload/vidur-DeepSeek-671B-world_size32-tp1-pp1-ep32-bs4-seq4096-prefill.csv b/vidur-alibabacloud/data/aicb_workload/vidur-DeepSeek-671B-world_size32-tp1-pp1-ep32-bs4-seq4096-prefill.csv new file mode 100644 index 00000000..e8066847 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/vidur-DeepSeek-671B-world_size32-tp1-pp1-ep32-bs4-seq4096-prefill.csv @@ -0,0 +1,123 @@ +layer_id layer_name comp_time comm_size +0 attention 23627076 0 +0 mlp 1371115 0 +1 attention 23627076 0 +1 mlp 1371115 0 +2 attention 23627076 0 +2 mlp 1371115 0 +3 attention 23627076 0 +3 moe 13102115 711983104 +4 attention 23627076 0 +4 moe 13102115 711983104 +5 attention 23627076 0 +5 moe 13102115 711983104 +6 attention 23627076 0 +6 moe 13102115 711983104 +7 attention 23627076 0 +7 moe 13102115 711983104 +8 attention 23627076 0 +8 moe 13102115 711983104 +9 attention 23627076 0 +9 moe 13102115 711983104 +10 attention 23627076 0 +10 moe 13102115 711983104 +11 attention 23627076 0 +11 moe 13102115 711983104 +12 attention 23627076 0 +12 moe 13102115 711983104 +13 attention 23627076 0 +13 moe 13102115 711983104 +14 attention 23627076 0 +14 moe 13102115 711983104 +15 attention 23627076 0 +15 moe 13102115 711983104 +16 attention 23627076 0 +16 moe 13102115 711983104 +17 attention 23627076 0 +17 moe 13102115 711983104 +18 attention 23627076 0 +18 moe 13102115 711983104 +19 attention 23627076 0 +19 moe 13102115 711983104 +20 attention 23627076 0 +20 moe 13102115 711983104 +21 attention 23627076 0 +21 moe 13102115 711983104 +22 attention 23627076 0 +22 moe 13102115 711983104 +23 attention 23627076 0 +23 moe 13102115 711983104 +24 attention 23627076 0 +24 moe 13102115 711983104 +25 attention 23627076 0 +25 moe 13102115 711983104 +26 attention 23627076 0 +26 moe 13102115 711983104 +27 attention 23627076 0 +27 moe 13102115 711983104 +28 attention 23627076 0 +28 moe 13102115 711983104 +29 attention 23627076 0 +29 moe 13102115 711983104 +30 attention 23627076 0 +30 moe 13102115 711983104 +31 attention 23627076 0 +31 moe 13102115 711983104 +32 attention 23627076 0 +32 moe 13102115 711983104 +33 attention 23627076 0 +33 moe 13102115 711983104 +34 attention 23627076 0 +34 moe 13102115 711983104 +35 attention 23627076 0 +35 moe 13102115 711983104 +36 attention 23627076 0 +36 moe 13102115 711983104 +37 attention 23627076 0 +37 moe 13102115 711983104 +38 attention 23627076 0 +38 moe 13102115 711983104 +39 attention 23627076 0 +39 moe 13102115 711983104 +40 attention 23627076 0 +40 moe 13102115 711983104 +41 attention 23627076 0 +41 moe 13102115 711983104 +42 attention 23627076 0 +42 moe 13102115 711983104 +43 attention 23627076 0 +43 moe 13102115 711983104 +44 attention 23627076 0 +44 moe 13102115 711983104 +45 attention 23627076 0 +45 moe 13102115 711983104 +46 attention 23627076 0 +46 moe 13102115 711983104 +47 attention 23627076 0 +47 moe 13102115 711983104 +48 attention 23627076 0 +48 moe 13102115 711983104 +49 attention 23627076 0 +49 moe 13102115 711983104 +50 attention 23627076 0 +50 moe 13102115 711983104 +51 attention 23627076 0 +51 moe 13102115 711983104 +52 attention 23627076 0 +52 moe 13102115 711983104 +53 attention 23627076 0 +53 moe 13102115 711983104 +54 attention 23627076 0 +54 moe 13102115 711983104 +55 attention 23627076 0 +55 moe 13102115 711983104 +56 attention 23627076 0 +56 moe 13102115 711983104 +57 attention 23627076 0 +57 moe 13102115 711983104 +58 attention 23627076 0 +58 moe 13102115 711983104 +59 attention 23627076 0 +59 moe 13102115 711983104 +60 attention 23627076 0 +60 moe 13102115 711983104 diff --git a/vidur-alibabacloud/data/aicb_workload/vidur-Qwen3-Moe-235B-world_size32-tp1-pp1-ep32-bs4-seq4096-decode.csv b/vidur-alibabacloud/data/aicb_workload/vidur-Qwen3-Moe-235B-world_size32-tp1-pp1-ep32-bs4-seq4096-decode.csv new file mode 100644 index 00000000..b8cd66cb --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/vidur-Qwen3-Moe-235B-world_size32-tp1-pp1-ep32-bs4-seq4096-decode.csv @@ -0,0 +1,189 @@ +layer_id layer_name comp_time comm_size +0 attention 85414 0 +0 moe 80703 397312 +1 attention 85414 0 +1 moe 80703 397312 +2 attention 85414 0 +2 moe 80703 397312 +3 attention 85414 0 +3 moe 80703 397312 +4 attention 85414 0 +4 moe 80703 397312 +5 attention 85414 0 +5 moe 80703 397312 +6 attention 85414 0 +6 moe 80703 397312 +7 attention 85414 0 +7 moe 80703 397312 +8 attention 85414 0 +8 moe 80703 397312 +9 attention 85414 0 +9 moe 80703 397312 +10 attention 85414 0 +10 moe 80703 397312 +11 attention 85414 0 +11 moe 80703 397312 +12 attention 85414 0 +12 moe 80703 397312 +13 attention 85414 0 +13 moe 80703 397312 +14 attention 85414 0 +14 moe 80703 397312 +15 attention 85414 0 +15 moe 80703 397312 +16 attention 85414 0 +16 moe 80703 397312 +17 attention 85414 0 +17 moe 80703 397312 +18 attention 85414 0 +18 moe 80703 397312 +19 attention 85414 0 +19 moe 80703 397312 +20 attention 85414 0 +20 moe 80703 397312 +21 attention 85414 0 +21 moe 80703 397312 +22 attention 85414 0 +22 moe 80703 397312 +23 attention 85414 0 +23 moe 80703 397312 +24 attention 85414 0 +24 moe 80703 397312 +25 attention 85414 0 +25 moe 80703 397312 +26 attention 85414 0 +26 moe 80703 397312 +27 attention 85414 0 +27 moe 80703 397312 +28 attention 85414 0 +28 moe 80703 397312 +29 attention 85414 0 +29 moe 80703 397312 +30 attention 85414 0 +30 moe 80703 397312 +31 attention 85414 0 +31 moe 80703 397312 +32 attention 85414 0 +32 moe 80703 397312 +33 attention 85414 0 +33 moe 80703 397312 +34 attention 85414 0 +34 moe 80703 397312 +35 attention 85414 0 +35 moe 80703 397312 +36 attention 85414 0 +36 moe 80703 397312 +37 attention 85414 0 +37 moe 80703 397312 +38 attention 85414 0 +38 moe 80703 397312 +39 attention 85414 0 +39 moe 80703 397312 +40 attention 85414 0 +40 moe 80703 397312 +41 attention 85414 0 +41 moe 80703 397312 +42 attention 85414 0 +42 moe 80703 397312 +43 attention 85414 0 +43 moe 80703 397312 +44 attention 85414 0 +44 moe 80703 397312 +45 attention 85414 0 +45 moe 80703 397312 +46 attention 85414 0 +46 moe 80703 397312 +47 attention 85414 0 +47 moe 80703 397312 +48 attention 85414 0 +48 moe 80703 397312 +49 attention 85414 0 +49 moe 80703 397312 +50 attention 85414 0 +50 moe 80703 397312 +51 attention 85414 0 +51 moe 80703 397312 +52 attention 85414 0 +52 moe 80703 397312 +53 attention 85414 0 +53 moe 80703 397312 +54 attention 85414 0 +54 moe 80703 397312 +55 attention 85414 0 +55 moe 80703 397312 +56 attention 85414 0 +56 moe 80703 397312 +57 attention 85414 0 +57 moe 80703 397312 +58 attention 85414 0 +58 moe 80703 397312 +59 attention 85414 0 +59 moe 80703 397312 +60 attention 85414 0 +60 moe 80703 397312 +61 attention 85414 0 +61 moe 80703 397312 +62 attention 85414 0 +62 moe 80703 397312 +63 attention 85414 0 +63 moe 80703 397312 +64 attention 85414 0 +64 moe 80703 397312 +65 attention 85414 0 +65 moe 80703 397312 +66 attention 85414 0 +66 moe 80703 397312 +67 attention 85414 0 +67 moe 80703 397312 +68 attention 85414 0 +68 moe 80703 397312 +69 attention 85414 0 +69 moe 80703 397312 +70 attention 85414 0 +70 moe 80703 397312 +71 attention 85414 0 +71 moe 80703 397312 +72 attention 85414 0 +72 moe 80703 397312 +73 attention 85414 0 +73 moe 80703 397312 +74 attention 85414 0 +74 moe 80703 397312 +75 attention 85414 0 +75 moe 80703 397312 +76 attention 85414 0 +76 moe 80703 397312 +77 attention 85414 0 +77 moe 80703 397312 +78 attention 85414 0 +78 moe 80703 397312 +79 attention 85414 0 +79 moe 80703 397312 +80 attention 85414 0 +80 moe 80703 397312 +81 attention 85414 0 +81 moe 80703 397312 +82 attention 85414 0 +82 moe 80703 397312 +83 attention 85414 0 +83 moe 80703 397312 +84 attention 85414 0 +84 moe 80703 397312 +85 attention 85414 0 +85 moe 80703 397312 +86 attention 85414 0 +86 moe 80703 397312 +87 attention 85414 0 +87 moe 80703 397312 +88 attention 85414 0 +88 moe 80703 397312 +89 attention 85414 0 +89 moe 80703 397312 +90 attention 85414 0 +90 moe 80703 397312 +91 attention 85414 0 +91 moe 80703 397312 +92 attention 85414 0 +92 moe 80703 397312 +93 attention 85414 0 +93 moe 80703 397312 diff --git a/vidur-alibabacloud/data/aicb_workload/vidur-Qwen3-Moe-235B-world_size32-tp1-pp1-ep32-bs4-seq4096-prefill.csv b/vidur-alibabacloud/data/aicb_workload/vidur-Qwen3-Moe-235B-world_size32-tp1-pp1-ep32-bs4-seq4096-prefill.csv new file mode 100644 index 00000000..4e6aaecc --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/vidur-Qwen3-Moe-235B-world_size32-tp1-pp1-ep32-bs4-seq4096-prefill.csv @@ -0,0 +1,189 @@ +layer_id layer_name comp_time comm_size +0 attention 2761041 0 +0 moe 4919649 406847488 +1 attention 2761041 0 +1 moe 4919649 406847488 +2 attention 2761041 0 +2 moe 4919649 406847488 +3 attention 2761041 0 +3 moe 4919649 406847488 +4 attention 2761041 0 +4 moe 4919649 406847488 +5 attention 2761041 0 +5 moe 4919649 406847488 +6 attention 2761041 0 +6 moe 4919649 406847488 +7 attention 2761041 0 +7 moe 4919649 406847488 +8 attention 2761041 0 +8 moe 4919649 406847488 +9 attention 2761041 0 +9 moe 4919649 406847488 +10 attention 2761041 0 +10 moe 4919649 406847488 +11 attention 2761041 0 +11 moe 4919649 406847488 +12 attention 2761041 0 +12 moe 4919649 406847488 +13 attention 2761041 0 +13 moe 4919649 406847488 +14 attention 2761041 0 +14 moe 4919649 406847488 +15 attention 2761041 0 +15 moe 4919649 406847488 +16 attention 2761041 0 +16 moe 4919649 406847488 +17 attention 2761041 0 +17 moe 4919649 406847488 +18 attention 2761041 0 +18 moe 4919649 406847488 +19 attention 2761041 0 +19 moe 4919649 406847488 +20 attention 2761041 0 +20 moe 4919649 406847488 +21 attention 2761041 0 +21 moe 4919649 406847488 +22 attention 2761041 0 +22 moe 4919649 406847488 +23 attention 2761041 0 +23 moe 4919649 406847488 +24 attention 2761041 0 +24 moe 4919649 406847488 +25 attention 2761041 0 +25 moe 4919649 406847488 +26 attention 2761041 0 +26 moe 4919649 406847488 +27 attention 2761041 0 +27 moe 4919649 406847488 +28 attention 2761041 0 +28 moe 4919649 406847488 +29 attention 2761041 0 +29 moe 4919649 406847488 +30 attention 2761041 0 +30 moe 4919649 406847488 +31 attention 2761041 0 +31 moe 4919649 406847488 +32 attention 2761041 0 +32 moe 4919649 406847488 +33 attention 2761041 0 +33 moe 4919649 406847488 +34 attention 2761041 0 +34 moe 4919649 406847488 +35 attention 2761041 0 +35 moe 4919649 406847488 +36 attention 2761041 0 +36 moe 4919649 406847488 +37 attention 2761041 0 +37 moe 4919649 406847488 +38 attention 2761041 0 +38 moe 4919649 406847488 +39 attention 2761041 0 +39 moe 4919649 406847488 +40 attention 2761041 0 +40 moe 4919649 406847488 +41 attention 2761041 0 +41 moe 4919649 406847488 +42 attention 2761041 0 +42 moe 4919649 406847488 +43 attention 2761041 0 +43 moe 4919649 406847488 +44 attention 2761041 0 +44 moe 4919649 406847488 +45 attention 2761041 0 +45 moe 4919649 406847488 +46 attention 2761041 0 +46 moe 4919649 406847488 +47 attention 2761041 0 +47 moe 4919649 406847488 +48 attention 2761041 0 +48 moe 4919649 406847488 +49 attention 2761041 0 +49 moe 4919649 406847488 +50 attention 2761041 0 +50 moe 4919649 406847488 +51 attention 2761041 0 +51 moe 4919649 406847488 +52 attention 2761041 0 +52 moe 4919649 406847488 +53 attention 2761041 0 +53 moe 4919649 406847488 +54 attention 2761041 0 +54 moe 4919649 406847488 +55 attention 2761041 0 +55 moe 4919649 406847488 +56 attention 2761041 0 +56 moe 4919649 406847488 +57 attention 2761041 0 +57 moe 4919649 406847488 +58 attention 2761041 0 +58 moe 4919649 406847488 +59 attention 2761041 0 +59 moe 4919649 406847488 +60 attention 2761041 0 +60 moe 4919649 406847488 +61 attention 2761041 0 +61 moe 4919649 406847488 +62 attention 2761041 0 +62 moe 4919649 406847488 +63 attention 2761041 0 +63 moe 4919649 406847488 +64 attention 2761041 0 +64 moe 4919649 406847488 +65 attention 2761041 0 +65 moe 4919649 406847488 +66 attention 2761041 0 +66 moe 4919649 406847488 +67 attention 2761041 0 +67 moe 4919649 406847488 +68 attention 2761041 0 +68 moe 4919649 406847488 +69 attention 2761041 0 +69 moe 4919649 406847488 +70 attention 2761041 0 +70 moe 4919649 406847488 +71 attention 2761041 0 +71 moe 4919649 406847488 +72 attention 2761041 0 +72 moe 4919649 406847488 +73 attention 2761041 0 +73 moe 4919649 406847488 +74 attention 2761041 0 +74 moe 4919649 406847488 +75 attention 2761041 0 +75 moe 4919649 406847488 +76 attention 2761041 0 +76 moe 4919649 406847488 +77 attention 2761041 0 +77 moe 4919649 406847488 +78 attention 2761041 0 +78 moe 4919649 406847488 +79 attention 2761041 0 +79 moe 4919649 406847488 +80 attention 2761041 0 +80 moe 4919649 406847488 +81 attention 2761041 0 +81 moe 4919649 406847488 +82 attention 2761041 0 +82 moe 4919649 406847488 +83 attention 2761041 0 +83 moe 4919649 406847488 +84 attention 2761041 0 +84 moe 4919649 406847488 +85 attention 2761041 0 +85 moe 4919649 406847488 +86 attention 2761041 0 +86 moe 4919649 406847488 +87 attention 2761041 0 +87 moe 4919649 406847488 +88 attention 2761041 0 +88 moe 4919649 406847488 +89 attention 2761041 0 +89 moe 4919649 406847488 +90 attention 2761041 0 +90 moe 4919649 406847488 +91 attention 2761041 0 +91 moe 4919649 406847488 +92 attention 2761041 0 +92 moe 4919649 406847488 +93 attention 2761041 0 +93 moe 4919649 406847488 diff --git a/vidur-alibabacloud/data/aicb_workload/vidur-Qwen3-Next-80B-world_size32-tp1-pp1-ep32-bs4-seq4096-decode.csv b/vidur-alibabacloud/data/aicb_workload/vidur-Qwen3-Next-80B-world_size32-tp1-pp1-ep32-bs4-seq4096-decode.csv new file mode 100644 index 00000000..3de7f249 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/vidur-Qwen3-Next-80B-world_size32-tp1-pp1-ep32-bs4-seq4096-decode.csv @@ -0,0 +1,97 @@ +layer_id layer_name comp_time comm_size +0 attention 39424 0 +0 moe 91644 248320 +1 attention 39424 0 +1 moe 91644 248320 +2 attention 39424 0 +2 moe 91644 248320 +3 attention 60355 0 +3 moe 91644 248320 +4 attention 39424 0 +4 moe 91644 248320 +5 attention 39424 0 +5 moe 91644 248320 +6 attention 39424 0 +6 moe 91644 248320 +7 attention 60355 0 +7 moe 91644 248320 +8 attention 39424 0 +8 moe 91644 248320 +9 attention 39424 0 +9 moe 91644 248320 +10 attention 39424 0 +10 moe 91644 248320 +11 attention 60355 0 +11 moe 91644 248320 +12 attention 39424 0 +12 moe 91644 248320 +13 attention 39424 0 +13 moe 91644 248320 +14 attention 39424 0 +14 moe 91644 248320 +15 attention 60355 0 +15 moe 91644 248320 +16 attention 39424 0 +16 moe 91644 248320 +17 attention 39424 0 +17 moe 91644 248320 +18 attention 39424 0 +18 moe 91644 248320 +19 attention 60355 0 +19 moe 91644 248320 +20 attention 39424 0 +20 moe 91644 248320 +21 attention 39424 0 +21 moe 91644 248320 +22 attention 39424 0 +22 moe 91644 248320 +23 attention 60355 0 +23 moe 91644 248320 +24 attention 39424 0 +24 moe 91644 248320 +25 attention 39424 0 +25 moe 91644 248320 +26 attention 39424 0 +26 moe 91644 248320 +27 attention 60355 0 +27 moe 91644 248320 +28 attention 39424 0 +28 moe 91644 248320 +29 attention 39424 0 +29 moe 91644 248320 +30 attention 39424 0 +30 moe 91644 248320 +31 attention 60355 0 +31 moe 91644 248320 +32 attention 39424 0 +32 moe 91644 248320 +33 attention 39424 0 +33 moe 91644 248320 +34 attention 39424 0 +34 moe 91644 248320 +35 attention 60355 0 +35 moe 91644 248320 +36 attention 39424 0 +36 moe 91644 248320 +37 attention 39424 0 +37 moe 91644 248320 +38 attention 39424 0 +38 moe 91644 248320 +39 attention 60355 0 +39 moe 91644 248320 +40 attention 39424 0 +40 moe 91644 248320 +41 attention 39424 0 +41 moe 91644 248320 +42 attention 39424 0 +42 moe 91644 248320 +43 attention 60355 0 +43 moe 91644 248320 +44 attention 39424 0 +44 moe 91644 248320 +45 attention 39424 0 +45 moe 91644 248320 +46 attention 39424 0 +46 moe 91644 248320 +47 attention 60355 0 +47 moe 91644 248320 diff --git a/vidur-alibabacloud/data/aicb_workload/vidur-Qwen3-Next-80B-world_size32-tp1-pp1-ep32-bs4-seq4096-prefill.csv b/vidur-alibabacloud/data/aicb_workload/vidur-Qwen3-Next-80B-world_size32-tp1-pp1-ep32-bs4-seq4096-prefill.csv new file mode 100644 index 00000000..654fbb81 --- /dev/null +++ b/vidur-alibabacloud/data/aicb_workload/vidur-Qwen3-Next-80B-world_size32-tp1-pp1-ep32-bs4-seq4096-prefill.csv @@ -0,0 +1,97 @@ +layer_id layer_name comp_time comm_size +0 attention 932883 0 +0 moe 1306253 254279680 +1 attention 932883 0 +1 moe 1306253 254279680 +2 attention 932883 0 +2 moe 1306253 254279680 +3 attention 883416 0 +3 moe 1306253 254279680 +4 attention 932883 0 +4 moe 1306253 254279680 +5 attention 932883 0 +5 moe 1306253 254279680 +6 attention 932883 0 +6 moe 1306253 254279680 +7 attention 883416 0 +7 moe 1306253 254279680 +8 attention 932883 0 +8 moe 1306253 254279680 +9 attention 932883 0 +9 moe 1306253 254279680 +10 attention 932883 0 +10 moe 1306253 254279680 +11 attention 883416 0 +11 moe 1306253 254279680 +12 attention 932883 0 +12 moe 1306253 254279680 +13 attention 932883 0 +13 moe 1306253 254279680 +14 attention 932883 0 +14 moe 1306253 254279680 +15 attention 883416 0 +15 moe 1306253 254279680 +16 attention 932883 0 +16 moe 1306253 254279680 +17 attention 932883 0 +17 moe 1306253 254279680 +18 attention 932883 0 +18 moe 1306253 254279680 +19 attention 883416 0 +19 moe 1306253 254279680 +20 attention 932883 0 +20 moe 1306253 254279680 +21 attention 932883 0 +21 moe 1306253 254279680 +22 attention 932883 0 +22 moe 1306253 254279680 +23 attention 883416 0 +23 moe 1306253 254279680 +24 attention 932883 0 +24 moe 1306253 254279680 +25 attention 932883 0 +25 moe 1306253 254279680 +26 attention 932883 0 +26 moe 1306253 254279680 +27 attention 883416 0 +27 moe 1306253 254279680 +28 attention 932883 0 +28 moe 1306253 254279680 +29 attention 932883 0 +29 moe 1306253 254279680 +30 attention 932883 0 +30 moe 1306253 254279680 +31 attention 883416 0 +31 moe 1306253 254279680 +32 attention 932883 0 +32 moe 1306253 254279680 +33 attention 932883 0 +33 moe 1306253 254279680 +34 attention 932883 0 +34 moe 1306253 254279680 +35 attention 883416 0 +35 moe 1306253 254279680 +36 attention 932883 0 +36 moe 1306253 254279680 +37 attention 932883 0 +37 moe 1306253 254279680 +38 attention 932883 0 +38 moe 1306253 254279680 +39 attention 883416 0 +39 moe 1306253 254279680 +40 attention 932883 0 +40 moe 1306253 254279680 +41 attention 932883 0 +41 moe 1306253 254279680 +42 attention 932883 0 +42 moe 1306253 254279680 +43 attention 883416 0 +43 moe 1306253 254279680 +44 attention 932883 0 +44 moe 1306253 254279680 +45 attention 932883 0 +45 moe 1306253 254279680 +46 attention 932883 0 +46 moe 1306253 254279680 +47 attention 883416 0 +47 moe 1306253 254279680 diff --git a/vidur-alibabacloud/data/hf_configs/deepseek_R1_0528_config.json b/vidur-alibabacloud/data/hf_configs/deepseek_R1_0528_config.json new file mode 100644 index 00000000..c482e777 --- /dev/null +++ b/vidur-alibabacloud/data/hf_configs/deepseek_R1_0528_config.json @@ -0,0 +1,67 @@ +{ + "architectures": [ + "DeepseekV3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_deepseek.DeepseekV3Config", + "AutoModel": "modeling_deepseek.DeepseekV3Model", + "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM" + }, + "bos_token_id": 0, + "eos_token_id": 1, + "ep_size": 1, + "first_k_dense_replace": 3, + "hidden_act": "silu", + "hidden_size": 7168, + "initializer_range": 0.02, + "intermediate_size": 18432, + "kv_lora_rank": 512, + "max_position_embeddings": 163840, + "model_type": "deepseek_v3", + "moe_intermediate_size": 2048, + "moe_layer_freq": 1, + "n_group": 8, + "n_routed_experts": 256, + "n_shared_experts": 1, + "norm_topk_prob": true, + "num_attention_heads": 128, + "num_experts_per_tok": 8, + "num_hidden_layers": 61, + "num_key_value_heads": 128, + "num_nextn_predict_layers": 1, + "q_lora_rank": 1536, + "qk_nope_head_dim": 128, + "qk_rope_head_dim": 64, + "quantization_config": { + "activation_scheme": "dynamic", + "fmt": "e4m3", + "quant_method": "fp8", + "weight_block_size": [ + 128, + 128 + ] + }, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "beta_fast": 32, + "beta_slow": 1, + "factor": 40, + "mscale": 1.0, + "mscale_all_dim": 1.0, + "original_max_position_embeddings": 4096, + "type": "yarn" + }, + "rope_theta": 10000, + "routed_scaling_factor": 2.5, + "scoring_func": "sigmoid", + "tie_word_embeddings": false, + "topk_group": 4, + "topk_method": "noaux_tc", + "torch_dtype": "bfloat16", + "transformers_version": "4.46.3", + "use_cache": true, + "v_head_dim": 128, + "vocab_size": 129280 +} diff --git a/vidur-alibabacloud/data/hf_configs/deepseek_v3_config.json b/vidur-alibabacloud/data/hf_configs/deepseek_v3_config.json new file mode 100644 index 00000000..0ef21d65 --- /dev/null +++ b/vidur-alibabacloud/data/hf_configs/deepseek_v3_config.json @@ -0,0 +1,70 @@ +{ + "architectures": [ + "DeepseekV3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_deepseek.DeepseekV3Config", + "AutoModel": "modeling_deepseek.DeepseekV3Model", + "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM" + }, + "aux_loss_alpha": 0.001, + "bos_token_id": 0, + "eos_token_id": 1, + "ep_size": 1, + "first_k_dense_replace": 3, + "hidden_act": "silu", + "hidden_size": 7168, + "initializer_range": 0.02, + "intermediate_size": 18432, + "kv_lora_rank": 512, + "max_position_embeddings": 163840, + "model_type": "deepseek_v3", + "moe_intermediate_size": 2048, + "moe_layer_freq": 1, + "n_group": 8, + "num_routed_experts": 256, + "num_shared_experts": 1, + "norm_topk_prob": true, + "num_attention_heads": 128, + "num_experts_per_tok": 8, + "num_hidden_layers": 61, + "num_key_value_heads": 128, + "num_nextn_predict_layers": 1, + "pretraining_tp": 1, + "q_lora_rank": 1536, + "qk_nope_head_dim": 128, + "qk_rope_head_dim": 64, + "quantization_config": { + "activation_scheme": "dynamic", + "fmt": "e4m3", + "quant_method": "fp8", + "weight_block_size": [ + 128, + 128 + ] + }, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "beta_fast": 32, + "beta_slow": 1, + "factor": 40, + "mscale": 1.0, + "mscale_all_dim": 1.0, + "original_max_position_embeddings": 4096, + "type": "yarn" + }, + "rope_theta": 10000, + "routed_scaling_factor": 2.5, + "scoring_func": "sigmoid", + "seq_aux": true, + "tie_word_embeddings": false, + "topk_group": 4, + "topk_method": "noaux_tc", + "torch_dtype": "bfloat16", + "transformers_version": "4.46.3", + "use_cache": true, + "v_head_dim": 128, + "vocab_size": 129280 +} diff --git a/vidur-alibabacloud/data/hf_configs/qwen3-235B-A22B_FP8_config.json b/vidur-alibabacloud/data/hf_configs/qwen3-235B-A22B_FP8_config.json new file mode 100644 index 00000000..5ae34ce5 --- /dev/null +++ b/vidur-alibabacloud/data/hf_configs/qwen3-235B-A22B_FP8_config.json @@ -0,0 +1,49 @@ +{ + "architectures": [ + "Qwen2MoeForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "decoder_sparse_step": 1, + "eos_token_id": 151643, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 16384, + "max_position_embeddings": 65536, + "max_window_layers": 94, + "mlp_only_layers": [], + "model_type": "qwen2_moe", + "moe_intermediate_size": 1536, + "norm_topk_prob": true, + "num_attention_heads": 64, + "num_experts": 128, + "num_experts_per_tok": 8, + "num_hidden_layers": 94, + "num_key_value_heads": 4, + "output_router_logits": false, + "qkv_bias": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "shared_expert_intermediate_size": 0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.49.0.dev0", + "use_cache": true, + "use_qk_norm": true, + "use_sliding_window": false, + "vocab_size": 151936, + "quantization_config": { + "activation_scheme": "dynamic", + "fmt": "e4m3", + "quant_method": "fp8", + "weight_block_size": [ + 128, + 128 + ] + } +} diff --git a/vidur-alibabacloud/data/hf_configs/qwen3-235B-A22B_config.json b/vidur-alibabacloud/data/hf_configs/qwen3-235B-A22B_config.json new file mode 100644 index 00000000..bb4e2521 --- /dev/null +++ b/vidur-alibabacloud/data/hf_configs/qwen3-235B-A22B_config.json @@ -0,0 +1,38 @@ +{ + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 151643, + "decoder_sparse_step": 1, + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "max_position_embeddings": 262144, + "max_window_layers": 94, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 1536, + "norm_topk_prob": true, + "num_attention_heads": 64, + "num_experts": 128, + "num_experts_per_tok": 8, + "num_hidden_layers": 94, + "num_key_value_heads": 4, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 5000000, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/vidur-alibabacloud/data/hf_configs/qwen3-30B-A3B_config.json b/vidur-alibabacloud/data/hf_configs/qwen3-30B-A3B_config.json new file mode 100644 index 00000000..29951baa --- /dev/null +++ b/vidur-alibabacloud/data/hf_configs/qwen3-30B-A3B_config.json @@ -0,0 +1,38 @@ +{ + "architectures": [ + "Qwen3MoeForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 151643, + "decoder_sparse_step": 1, + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 6144, + "max_position_embeddings": 40960, + "max_window_layers": 48, + "mlp_only_layers": [], + "model_type": "qwen3_moe", + "moe_intermediate_size": 768, + "norm_topk_prob": true, + "num_attention_heads": 32, + "num_experts": 128, + "num_experts_per_tok": 8, + "num_hidden_layers": 48, + "num_key_value_heads": 4, + "output_router_logits": false, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/vidur-alibabacloud/data/hf_configs/qwen3-8B_config.json b/vidur-alibabacloud/data/hf_configs/qwen3-8B_config.json new file mode 100644 index 00000000..79c4558e --- /dev/null +++ b/vidur-alibabacloud/data/hf_configs/qwen3-8B_config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12288, + "max_position_embeddings": 40960, + "max_window_layers": 36, + "model_type": "qwen3", + "num_attention_heads": 32, + "num_hidden_layers": 36, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.0", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/vidur-alibabacloud/data/hf_configs/qwen3-next-80B-A3B_Instruct_FP8_config.json b/vidur-alibabacloud/data/hf_configs/qwen3-next-80B-A3B_Instruct_FP8_config.json new file mode 100644 index 00000000..b664bd1e --- /dev/null +++ b/vidur-alibabacloud/data/hf_configs/qwen3-next-80B-A3B_Instruct_FP8_config.json @@ -0,0 +1,43 @@ +{ + "architectures": [ + "Qwen3NextForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "decoder_sparse_step": 1, + "eos_token_id": 151645, + "full_attention_interval": 4, + "head_dim": 256, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 5120, + "linear_conv_kernel_dim": 4, + "linear_key_head_dim": 128, + "linear_num_key_heads": 16, + "linear_num_value_heads": 32, + "linear_value_head_dim": 128, + "max_position_embeddings": 262144, + "mlp_only_layers": [], + "model_type": "qwen3_next", + "moe_intermediate_size": 512, + "norm_topk_prob": true, + "num_attention_heads": 16, + "num_experts": 512, + "num_experts_per_tok": 10, + "num_hidden_layers": 48, + "num_key_value_heads": 2, + "output_router_logits": false, + "partial_rotary_factor": 0.25, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000000, + "router_aux_loss_coef": 0.001, + "shared_expert_intermediate_size": 512, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.57.0.dev0", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/vidur-alibabacloud/data/hf_configs/qwen3-next-80B-A3B_config.json b/vidur-alibabacloud/data/hf_configs/qwen3-next-80B-A3B_config.json new file mode 100644 index 00000000..b664bd1e --- /dev/null +++ b/vidur-alibabacloud/data/hf_configs/qwen3-next-80B-A3B_config.json @@ -0,0 +1,43 @@ +{ + "architectures": [ + "Qwen3NextForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "decoder_sparse_step": 1, + "eos_token_id": 151645, + "full_attention_interval": 4, + "head_dim": 256, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 5120, + "linear_conv_kernel_dim": 4, + "linear_key_head_dim": 128, + "linear_num_key_heads": 16, + "linear_num_value_heads": 32, + "linear_value_head_dim": 128, + "max_position_embeddings": 262144, + "mlp_only_layers": [], + "model_type": "qwen3_next", + "moe_intermediate_size": 512, + "norm_topk_prob": true, + "num_attention_heads": 16, + "num_experts": 512, + "num_experts_per_tok": 10, + "num_hidden_layers": 48, + "num_key_value_heads": 2, + "output_router_logits": false, + "partial_rotary_factor": 0.25, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000000, + "router_aux_loss_coef": 0.001, + "shared_expert_intermediate_size": 512, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.57.0.dev0", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/vidur-alibabacloud/examples/vidur-ali-scenarios/run_scenarios.sh b/vidur-alibabacloud/examples/vidur-ali-scenarios/run_scenarios.sh new file mode 100644 index 00000000..82d00dc8 --- /dev/null +++ b/vidur-alibabacloud/examples/vidur-ali-scenarios/run_scenarios.sh @@ -0,0 +1,361 @@ +#!/usr/bin/env bash +# ============================================================================= +# run_scenarios.sh — SimAI / AICB Vidur 四场景一键运行脚本 +# +# 所有文件统一汇聚于 examples/vidur-ali-scenarios/ 目录: +# examples/vidur-ali-scenarios/ +# ├── run_scenarios.sh ← 本脚本 +# ├── logs/ ← tee 运行日志 +# │ └── scenario__.log +# └── simulator_output/ ← vidur 模拟输出 (通过 --output_dir 覆盖) +# └── / +# +# 用法: +# bash examples/vidur-ali-scenarios/run_scenarios.sh --scenario <1|2|3|4> +# bash examples/vidur-ali-scenarios/run_scenarios.sh --all +# bash examples/vidur-ali-scenarios/run_scenarios.sh -h | --help +# +# 场景说明: +# 1 Qwen3-Next-80B 无PD分离 ws=32 (dp=32, tp=1, pp=1, ep=32) 调度: lor +# 2 Qwen3-Next-80B PD分离 ws=8 (P=2, D=6, tp=1, pp=1) 调度: split_wise +# 3 DeepSeek-671B PD分离 ws=8 (P=2, D=6, tp=8, pp=1, ep=8) 调度: split_wise +# 4 Qwen3-MoE-235B PD分离 ws=8 (P=2, D=6, tp=4, pp=1, ep=4) 调度: split_wise +# +# 环境要求: +# conda activate vidur +# conda 路径: /root/miniconda3/envs/vidur +# python: /root/miniconda3/envs/vidur/bin/python +# ============================================================================= + +set -euo pipefail + +# ===================== 路径设置 ===================== +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VIDUR_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +LOG_DIR="$SCRIPT_DIR/logs" +OUTPUT_DIR="$SCRIPT_DIR/simulator_output" + +mkdir -p "$LOG_DIR" +mkdir -p "$OUTPUT_DIR" + +# ===================== 工具函数 ===================== + +cleanup() { + local exit_code=$? + if [[ $exit_code -ne 0 ]]; then + echo "" + echo "[WARN] Script exited abnormally (脚本异常退出), exit_code=$exit_code" + echo " Log dir (日志目录): $LOG_DIR" + echo " Output dir (输出目录): $OUTPUT_DIR" + fi +} +trap 'cleanup' EXIT INT TERM + +validate_environment() { + local conda_env="${CONDA_DEFAULT_ENV:-}" + if [[ "$conda_env" != "vidur" ]]; then + echo "[ERROR] vidur conda env not detected (未检测到 vidur conda 环境)" + echo " Current env (当前环境): ${conda_env:-N/A}" + echo " Please run (请先执行): conda activate vidur" + echo " conda path (路径): /root/miniconda3/envs/vidur" + exit 1 + fi + local python_bin + python_bin="$(which python 2>/dev/null || true)" + if [[ "$python_bin" != */miniconda3/envs/vidur/* ]]; then + echo "[ERROR] python not in vidur env (python 路径不在 vidur 环境内)" + echo " Current python (当前 python): ${python_bin:-not found}" + echo " Expected path (期望路径): /root/miniconda3/envs/vidur/bin/python" + exit 1 + fi + echo "[INFO] Env check passed (环境检查通过): conda=$conda_env, python=$python_bin" +} + +check_disk_space() { + local required_gb=10 + local available + available=$(df "$SCRIPT_DIR" | awk 'NR==2 {print int($4/1024/1024)}') + if [[ "$available" -lt "$required_gb" ]]; then + echo "[ERROR] Insufficient disk space (磁盘空间不足): need ${required_gb}GB, available ${available}GB" + exit 1 + fi + echo "[INFO] Disk check passed (磁盘空间检查通过): available ${available}GB, need ${required_gb}GB" +} + +progress_bar() { + local current=$1 total=$2 + local percent=$((current * 100 / total)) + local filled=$((percent / 5)) + local bar + bar=$(printf "%${filled}s" | tr ' ' '=') + local empty=$((20 - filled)) + local space + space=$(printf "%${empty}s") + printf "\n[%-20s] %d%% (%d/%d)\n" "${bar}${space}" "$percent" "$current" "$total" +} + +validate_scenario_output() { + local scenario_num=$1 + local output_dir=$2 + # Find the latest timestamped output directory + # Use || true to prevent SIGPIPE when ls outputs multiple entries under set -eo pipefail + local latest_dir + latest_dir=$(ls -td "$output_dir"/*/ 2>/dev/null | head -1) || true + if [[ -z "$latest_dir" ]]; then + echo "[WARN] Scenario $scenario_num: no output directory found in $output_dir" + return 1 + fi + if [[ -f "$latest_dir/chrome_trace.json" ]]; then + echo "[INFO] Scenario $scenario_num: validated (chrome_trace.json found)" + else + echo "[WARN] Scenario $scenario_num: chrome_trace.json NOT found in $latest_dir" + return 1 + fi +} + +# ===================== 公共参数 ===================== +# 四个场景共用的硬件/请求生成/后端参数 +COMMON_ARGS=( + # 硬件 + --replica_config_pd_p2p_comm_bandwidth 800 + --replica_config_nvlink_bandwidth 1600 + --replica_config_rdma_bandwidth 800 + --replica_config_pd_p2p_comm_dtype fp8 + --replica_config_network_device h20_dgx + --replica_config_device h20 + # 请求生成: Poisson QPS=100, 固定长度 prefill=100 / decode=8 + --request_generator_config_type synthetic + --interval_generator_config_type poisson + --poisson_request_interval_generator_config_qps 100 + --synthetic_request_generator_config_num_requests 4 + --length_generator_config_type fixed + --fixed_request_length_generator_config_prefill_tokens 100 + --fixed_request_length_generator_config_decode_tokens 8 + --trace_request_length_generator_config_trace_file \ + ./data/processed_traces/splitwise_conv.csv + # 后端 + --random_forrest_execution_time_predictor_config_backend aicb + # 输出目录 → examples/vidur-ali-scenarios/simulator_output/ + --metrics_config_output_dir "$OUTPUT_DIR" +) + +# ===================== 场景函数 ===================== + +# ----------------------------------------------------------------------- +# 场景 1: Qwen3-Next-80B 无PD分离 +# cluster_config_num_replicas = 32 (即 dp=32) +# ws = tp(1) × pp(1) × dp(32) = 32,ep = ws = 32(自动) +# 调度: global=lor, replica=sarathi +# ----------------------------------------------------------------------- +run_scenario_1() { + local ts + ts="$(date +%Y%m%d_%H%M%S)" + local log_file="$LOG_DIR/scenario_1_${ts}.log" + echo "[INFO] === Scenario 1: Qwen3-Next-80B, no PD, ws=32, lor (场景1: 无PD, ws=32, lor) ===" + echo "[INFO] Log (日志): $log_file" + cd "$VIDUR_ROOT" + set +o pipefail + python -m vidur.main \ + "${COMMON_ARGS[@]}" \ + --cluster_config_num_replicas 32 \ + --replica_config_pd_node_ratio 1 \ + --global_scheduler_config_type lor \ + --replica_scheduler_config_type sarathi \ + --replica_config_model_name qwen3-next-80B \ + --replica_config_tensor_parallel_size 1 \ + --replica_config_num_pipeline_stages 1 \ + 2>&1 | tee "$log_file" + local exit_code=${PIPESTATUS[0]} + set -o pipefail + if [[ $exit_code -ne 0 ]]; then + echo "[ERROR] Scenario 1 failed (exit_code=$exit_code), see: $log_file" + return $exit_code + fi + validate_scenario_output 1 "$OUTPUT_DIR" + echo "[INFO] Scenario 1 done (场景1 完成)" +} + +# ----------------------------------------------------------------------- +# 场景 2: Qwen3-Next-80B PD分离 +# 总 replica=8; num_prefill_replicas=2 → prefill dp=2, decode dp=6 +# prefill: ws = tp(1) × pp(1) × dp(2) = 2,ep = 2 +# decode: ws = tp(1) × pp(1) × dp(6) = 6,ep = 6 +# 调度: global=split_wise, replica=split_wise +# ----------------------------------------------------------------------- +run_scenario_2() { + local ts + ts="$(date +%Y%m%d_%H%M%S)" + local log_file="$LOG_DIR/scenario_2_${ts}.log" + echo "[INFO] === Scenario 2: Qwen3-Next-80B, PD, P=2 D=6, split_wise (场景2: PD分离, P=2 D=6) ===" + echo "[INFO] Log (日志): $log_file" + cd "$VIDUR_ROOT" + set +o pipefail + python -m vidur.main \ + "${COMMON_ARGS[@]}" \ + --cluster_config_num_replicas 8 \ + --replica_config_pd_node_ratio 0.25 \ + --replica_config_num_prefill_replicas 2 \ + --global_scheduler_config_type split_wise \ + --replica_scheduler_config_type split_wise \ + --replica_config_model_name qwen3-next-80B \ + --replica_config_tensor_parallel_size 1 \ + --replica_config_num_pipeline_stages 1 \ + --replica_config_prefill_tensor_parallel_size 1 \ + --replica_config_prefill_num_pipeline_stages 1 \ + --replica_config_decode_tensor_parallel_size 1 \ + --replica_config_decode_num_pipeline_stages 1 \ + 2>&1 | tee "$log_file" + local exit_code=${PIPESTATUS[0]} + set -o pipefail + if [[ $exit_code -ne 0 ]]; then + echo "[ERROR] Scenario 2 failed (exit_code=$exit_code), see: $log_file" + return $exit_code + fi + validate_scenario_output 2 "$OUTPUT_DIR" + echo "[INFO] Scenario 2 done (场景2 完成)" +} + +# ----------------------------------------------------------------------- +# 场景 3: DeepSeek-671B PD分离 +# 总 replica=8; pd_node_ratio=0.25 → prefill dp=2, decode dp=6 +# ws = tp(8) × pp(1) × dp = 16(P)/48(D),ep = 8 +# 调度: global=split_wise, replica=split_wise +# ----------------------------------------------------------------------- +run_scenario_3() { + local ts + ts="$(date +%Y%m%d_%H%M%S)" + local log_file="$LOG_DIR/scenario_3_${ts}.log" + echo "[INFO] === Scenario 3: DeepSeek-671B, PD, tp=8, ep=8, split_wise (场景3: PD分离, tp=8, ep=8) ===" + echo "[INFO] Log (日志): $log_file" + cd "$VIDUR_ROOT" + set +o pipefail + python -m vidur.main \ + "${COMMON_ARGS[@]}" \ + --cluster_config_num_replicas 8 \ + --replica_config_pd_node_ratio 0.25 \ + --global_scheduler_config_type split_wise \ + --replica_scheduler_config_type split_wise \ + --replica_config_model_name deepseek-671B \ + --replica_config_tensor_parallel_size 8 \ + --replica_config_num_pipeline_stages 1 \ + --replica_config_expert_model_parallel_size 8 \ + 2>&1 | tee "$log_file" + local exit_code=${PIPESTATUS[0]} + set -o pipefail + if [[ $exit_code -ne 0 ]]; then + echo "[ERROR] Scenario 3 failed (exit_code=$exit_code), see: $log_file" + return $exit_code + fi + validate_scenario_output 3 "$OUTPUT_DIR" + echo "[INFO] Scenario 3 done (场景3 完成)" +} + +# ----------------------------------------------------------------------- +# 场景 4: Qwen3-MoE-235B PD分离 +# 总 replica=8; pd_node_ratio=0.25 → prefill dp=2, decode dp=6 +# ws = tp(4) × pp(1) × dp = 8(P)/24(D),ep = 4 +# 调度: global=split_wise, replica=split_wise +# ----------------------------------------------------------------------- +run_scenario_4() { + local ts + ts="$(date +%Y%m%d_%H%M%S)" + local log_file="$LOG_DIR/scenario_4_${ts}.log" + echo "[INFO] === Scenario 4: Qwen3-MoE-235B, PD, tp=4, ep=4, split_wise (场景4: PD分离, tp=4, ep=4) ===" + echo "[INFO] Log (日志): $log_file" + cd "$VIDUR_ROOT" + set +o pipefail + python -m vidur.main \ + "${COMMON_ARGS[@]}" \ + --cluster_config_num_replicas 8 \ + --replica_config_pd_node_ratio 0.25 \ + --global_scheduler_config_type split_wise \ + --replica_scheduler_config_type split_wise \ + --replica_config_model_name qwen3-moe-235B \ + --replica_config_tensor_parallel_size 4 \ + --replica_config_num_pipeline_stages 1 \ + --replica_config_expert_model_parallel_size 4 \ + 2>&1 | tee "$log_file" + local exit_code=${PIPESTATUS[0]} + set -o pipefail + if [[ $exit_code -ne 0 ]]; then + echo "[ERROR] Scenario 4 failed (exit_code=$exit_code), see: $log_file" + return $exit_code + fi + validate_scenario_output 4 "$OUTPUT_DIR" + echo "[INFO] Scenario 4 done (场景4 完成)" +} + +# ===================== 帮助信息 ===================== + +print_help() { + cat <<'EOF' +Usage (用法): + bash examples/vidur-ali-scenarios/run_scenarios.sh --scenario Run single scenario (运行单个场景, N=1~4) + bash examples/vidur-ali-scenarios/run_scenarios.sh --all Run all 4 scenarios (顺序运行全部四个场景) + bash examples/vidur-ali-scenarios/run_scenarios.sh -h | --help Print help (打印帮助) + +Scenarios (场景列表): + 1 Qwen3-Next-80B no PD (无PD分离) ws=32 scheduler: lor + 2 Qwen3-Next-80B PD (PD分离) ws=8 (P=2,D=6) scheduler: split_wise + 3 DeepSeek-671B PD (PD分离) tp=8, ep=8 scheduler: split_wise + 4 Qwen3-MoE-235B PD (PD分离) tp=4, ep=4 scheduler: split_wise + +Output dir (输出目录): examples/vidur-ali-scenarios/simulator_output// +Log dir (日志目录): examples/vidur-ali-scenarios/logs/scenario__.log +EOF +} + +# ===================== 入口 ===================== + +main() { + # --help / -h 不需要环境检查,直接处理 + case "${1:-}" in + -h|--help|"") print_help; exit 0 ;; + esac + + echo "============================================================" + echo " SimAI / AICB Vidur 4-Scenario Runner (四场景运行脚本)" + echo " Root dir (根目录): $SCRIPT_DIR" + echo "============================================================" + + validate_environment + check_disk_space + + case "${1:-}" in + --scenario) + case "${2:-}" in + 1) run_scenario_1 ;; + 2) run_scenario_2 ;; + 3) run_scenario_3 ;; + 4) run_scenario_4 ;; + *) echo "[ERROR] Invalid scenario (无效场景编号): ${2:-}, use 1~4"; exit 1 ;; + esac + ;; + --all) + local total=4 + run_scenario_1 + progress_bar 1 $total + + run_scenario_2 + progress_bar 2 $total + + run_scenario_3 + progress_bar 3 $total + + run_scenario_4 + progress_bar 4 $total + + echo "" + echo "[INFO] All 4 scenarios completed (全部 4 个场景运行完毕)!" + echo " Logs (日志): $LOG_DIR/" + echo " Output (输出): $OUTPUT_DIR/" + ;; + *) + echo "[ERROR] Unknown argument (未知参数): $1" + print_help + exit 1 + ;; + esac +} + +main "$@" diff --git a/vidur-alibabacloud/vidur/config/config.py b/vidur-alibabacloud/vidur/config/config.py index 14eb13db..cf0e2b9a 100644 --- a/vidur-alibabacloud/vidur/config/config.py +++ b/vidur-alibabacloud/vidur/config/config.py @@ -483,22 +483,108 @@ class ReplicaConfig: pd_p2p_comm_dtype: str = field( default='float16', - metadata={"help": "> add: pd_p2p_comm_dtype for pd disaggregation."}, + metadata={"help": "> add: pd_p2p_comm_dtype for pd disaggregation." + "choices=['fp8', 'float16', 'float32', 'float64', 'bfloat16', 'int8', 'int16', 'int32', 'int64']," + }, + ) - pd_node_ratio: float = field( - default=0.5, - metadata={"help": "Number of p replicas : number of d replicas."}, - ) + # pd_node_ratio: float = field( + # default=0.5, + # metadata={"help": "Number of p replicas : number of d replicas."}, + # ) - # parser.add_argument('--expert_model_parallel_size', type=int, default=1, help='Degree of expert model parallelism.') + # when pd_node_ratio = 1, all replicas are P-nodes; no D-nodes; that means not pd disaggregation + pd_node_ratio: float = field( + default=1, + metadata={"help": "Number of p replicas : number of d replicas.; Ratio of P-nodes to (P-nodes + D-nodes) Fraction of replicas allocated as prefill (P) nodes. The remaining replicas are used as decode (D) nodes. For example, 0.5 means half of the replicas are prefill nodes and half are decode nodes (P:D = 1:1)."}, + ) + + # ============================================================ + # [EP Auto] Auto-computed EP, user doesn't need to specify; user value will be overridden + # Temporary = tp * pp (per-replica), later overridden in cluster.py + # to tp * pp * dp (full cluster world_size) + # [EP Auto] EP 自动计算, 用户无需手动传, 传了也会被覆盖 + # 临时值 = tp * pp (per-replica), 后续在 cluster.py 中 + # 覆盖为 tp * pp * dp (全集群 world_size) + # ============================================================ expert_model_parallel_size: int = field( default=1, - metadata={"help": "Degree of expert model parallelism."}, + metadata={"help": "Degree of expert model parallelism. " + "Auto-computed as cluster world_size (tp*pp*dp) in cluster.py, " + "user-provided value will be overridden."}, + ) + + # ============================================================ + # PD separation specific params (optional, fallback to shared values above if not set) + # Reference vLLM: PD separation runs prefill/decode as completely independent clusters + # with potentially different TP/PP/EP + # PD 分离专用参数 (可选, 未指定时 fallback 到上方共享值) + # 参考 vLLM: PD 分离时 prefill/decode 是完全独立的集群 + # 可以有不同 TP/PP/EP + # ============================================================ + prefill_tensor_parallel_size: Optional[int] = field( + default=None, + metadata={"help": "Prefill-specific TP size. Falls back to tensor_parallel_size if not set."}, + ) + prefill_num_pipeline_stages: Optional[int] = field( + default=None, + metadata={"help": "Prefill-specific PP size. Falls back to num_pipeline_stages if not set."}, ) + decode_tensor_parallel_size: Optional[int] = field( + default=None, + metadata={"help": "Decode-specific TP size. Falls back to tensor_parallel_size if not set."}, + ) + decode_num_pipeline_stages: Optional[int] = field( + default=None, + metadata={"help": "Decode-specific PP size. Falls back to num_pipeline_stages if not set."}, + ) + # Directly specify prefill replica count, takes priority over pd_node_ratio + # More flexible: avoids pd_node_ratio indivisibility issues + # 直接指定 prefill replica 数量, 优先于 pd_node_ratio 计算 + # 更灵活: 避免 pd_node_ratio 不整除的问题 + num_prefill_replicas: Optional[int] = field( + default=None, + metadata={"help": "Directly specify number of prefill replicas. " + "Takes priority over pd_node_ratio when set. " + "num_decode_replicas = total_replicas - num_prefill_replicas."}, + ) + def __post_init__(self): + # Base world_size: per-replica GPU count (excluding dp) + # 基础 world_size: per-replica 的 GPU 数 (不含 dp) self.world_size = self.num_pipeline_stages * self.tensor_parallel_size + + # ============================================================ + # [EP] Temporary = tp * pp (per-replica), excluding dp + # Will be overridden in cluster.py to tp * pp * dp (full cluster world_size) + # Initialization only, printed value is for reference + # [EP] 临时值 = tp * pp (per-replica), 不含 dp + # 后续在 cluster.py 中会被覆盖为 tp * pp * dp (全集群 world_size) + # 这里只是初始化, 打印仅供参考 + # ============================================================ + user_ep = self.expert_model_parallel_size + self.expert_model_parallel_size = self.world_size # Temporary, overridden in cluster.py / 临时值, cluster.py 会覆盖 + if user_ep != 1 and user_ep != self.world_size: + logger.info(f"[EP] Note: user-provided expert_model_parallel_size={user_ep}, " + f"temporarily set to per-replica ws={self.world_size}, " + f"final value will be overridden in cluster.py as tp*pp*dp") + + # 打印 ReplicaConfig 配置摘要 | Print ReplicaConfig summary + logger.info(f"[ReplicaConfig] tp={self.tensor_parallel_size}, pp={self.num_pipeline_stages}, " + f"per_replica_ws={self.world_size}, ep(temp)={self.expert_model_parallel_size}, " + f"pd_ratio={self.pd_node_ratio}") + if self.pd_node_ratio < 1: + p_tp = self.prefill_tensor_parallel_size or self.tensor_parallel_size + p_pp = self.prefill_num_pipeline_stages or self.num_pipeline_stages + d_tp = self.decode_tensor_parallel_size or self.tensor_parallel_size + d_pp = self.decode_num_pipeline_stages or self.num_pipeline_stages + logger.info(f"[ReplicaConfig] PD separation enabled: " + f"prefill(tp={p_tp}, pp={p_pp}), decode(tp={d_tp}, pp={d_pp})") + if self.num_prefill_replicas is not None: + logger.info(f"[ReplicaConfig] User specified num_prefill_replicas={self.num_prefill_replicas}") + self.model_config: BaseModelConfig = BaseModelConfig.create_from_name( self.model_name ) diff --git a/vidur-alibabacloud/vidur/config/device_sku_config.py b/vidur-alibabacloud/vidur/config/device_sku_config.py index 5ae5c28d..82d0e7ed 100644 --- a/vidur-alibabacloud/vidur/config/device_sku_config.py +++ b/vidur-alibabacloud/vidur/config/device_sku_config.py @@ -33,6 +33,17 @@ def get_type(): # import pdb; pdb.set_trace() # > return DeviceSKUType.A100 +@dataclass +class H20DeviceSKUConfig(BaseDeviceSKUConfig): + fp16_tflops: int = 148 + fp8_tflops: int = 296 + total_memory_gb: int = 141 + + @staticmethod + def get_type(): + # import pdb; pdb.set_trace() # > + return DeviceSKUType.H20 + @dataclass class H100DeviceSKUConfig(BaseDeviceSKUConfig): @@ -45,9 +56,31 @@ def get_type(): @dataclass class H800DeviceSKUConfig(BaseDeviceSKUConfig): - fp16_tflops: int = 1000 + fp16_tflops: int = 989 + fp8_tflops: int = 1979 total_memory_gb: int = 80 @staticmethod def get_type(): - return DeviceSKUType.H800 \ No newline at end of file + return DeviceSKUType.H800 + + +class H200DeviceSKUConfig(BaseDeviceSKUConfig): + fp16_tflops: int = 989 + fp8_tflops: int = 1979 + total_memory_gb: int = 141 + + @staticmethod + def get_type(): + return DeviceSKUType.H200 + + +# GB200 NVL72 +class GB200DeviceSKUConfig(BaseDeviceSKUConfig): + fp16_tflops: int = 2500 + fp8_tflops: int = 5000 + total_memory_gb: int = 192 + + @staticmethod + def get_type(): + return DeviceSKUType.GB200 \ No newline at end of file diff --git a/vidur-alibabacloud/vidur/config/model_config.py b/vidur-alibabacloud/vidur/config/model_config.py index 0e689dcb..e4a6c698 100644 --- a/vidur-alibabacloud/vidur/config/model_config.py +++ b/vidur-alibabacloud/vidur/config/model_config.py @@ -30,7 +30,6 @@ class BaseModelConfig(BaseFixedConfig): no_tensor_parallel: bool = False -@dataclass @dataclass class DeepseekV3ModelConfig(BaseModelConfig): # "num_hidden_layers": 61, @@ -115,6 +114,127 @@ class DeepseekV3ModelConfig(BaseModelConfig): @staticmethod def get_name(): return "deepseek-671B" + + + + +@dataclass +class Qwen3Next80BA3BModelConfig(BaseModelConfig): + # architectures: list = field(default_factory=lambda: ["Qwen3NextForCausalLM"]) + architectures: str = "Qwen3NextForCausalLM" + attention_dropout: float = 0.0 + bos_token_id: int = 151643 + decoder_sparse_step: int = 1 + eos_token_id: int = 151645 + full_attention_interval: int = 4 + head_dim: int = 256 + hidden_act: str = "silu" + hidden_size: int = 2048 + initializer_range: float = 0.02 + intermediate_size: int = 5120 + linear_conv_kernel_dim: int = 4 + linear_key_head_dim: int = 128 + linear_num_key_heads: int = 16 + linear_num_value_heads: int = 32 + linear_value_head_dim: int = 128 + max_position_embeddings: int = 262144 + mlp_only_layers: list = field(default_factory=list) + model_type: str = "qwen3_next" + moe_intermediate_size: int = 512 + norm_topk_prob: bool = True + num_attention_heads: int = 16 + num_experts: int = 512 + num_experts_per_tok: int = 10 + num_hidden_layers: int = 48 + num_key_value_heads: int = 2 + output_router_logits: bool = False + partial_rotary_factor: float = 0.25 + rms_norm_eps: float = 1e-06 + rope_scaling: Optional[Dict[str, Any]] = None + rope_theta: float = 10000000 + router_aux_loss_coef: float = 0.001 + shared_expert_intermediate_size: int = 512 + tie_word_embeddings: bool = False + torch_dtype: str = "bfloat16" + transformers_version: str = "4.57.0.dev0" + use_cache: bool = True + use_sliding_window: bool = False + vocab_size: int = 151936 + # Fields mapped from base class parameters / 与基类参数对应的字段 + num_layers: int = 48 # maps to num_hidden_layers / 对应 num_hidden_layers + num_q_heads: int = 16 # maps to num_attention_heads / 对应 num_attention_heads + num_kv_heads: int = 2 # maps to num_key_value_heads / 对应 num_key_value_heads + embedding_dim: int = 2048 # maps to hidden_size / 对应 hidden_size + mlp_hidden_dim: int = 5120 # maps to intermediate_size / 对应 intermediate_size + use_gated_mlp: bool = True # per model arch / 根据模型架构设定 + use_bias: bool = False # per model arch / 根据模型架构设定 + use_qkv_bias: bool = False # per model arch / 根据模型架构设定 + activation: ActivationType = ActivationType.SILU # maps to hidden_act / 对应 hidden_act + norm: NormType = NormType.RMS_NORM # per model arch / 根据模型架构设定 + post_attn_norm: bool = True # per model arch / 根据模型架构设定 + + + @staticmethod + def get_name(): + return "qwen3-next-80B" + + +@dataclass +class Qwen3235BA22BModelConfig(BaseModelConfig): + # architectures: list = field(default_factory=lambda: ["Qwen3MoeForCausalLM"]) + architectures: str = "Qwen3MoeForCausalLM" + attention_bias: bool = False + attention_dropout: float = 0.0 + bos_token_id: int = 151643 + decoder_sparse_step: int = 1 + eos_token_id: int = 151645 + head_dim: int = 128 + hidden_act: str = "silu" + hidden_size: int = 4096 + initializer_range: float = 0.02 + intermediate_size: int = 12288 + max_position_embeddings: int = 262144 + max_window_layers: int = 94 + mlp_only_layers: list = field(default_factory=list) + model_type: str = "qwen3_moe" + moe_intermediate_size: int = 1536 + norm_topk_prob: bool = True + num_attention_heads: int = 64 + num_experts: int = 128 + num_experts_per_tok: int = 8 + num_hidden_layers: int = 94 + num_key_value_heads: int = 4 + output_router_logits: bool = False + rms_norm_eps: float = 1e-06 + rope_scaling: Optional[Dict[str, Any]] = None + rope_theta: float = 5000000 + router_aux_loss_coef: float = 0.001 + sliding_window: Optional[int] = None + tie_word_embeddings: bool = False + torch_dtype: str = "bfloat16" + transformers_version: str = "4.51.0" + use_cache: bool = True + use_sliding_window: bool = False + vocab_size: int = 151936 + # Fields mapped from base class parameters / 与基类参数对应的字段 + num_layers: int = 94 # maps to num_hidden_layers / 对应 num_hidden_layers + num_q_heads: int = 64 # maps to num_attention_heads / 对应 num_attention_heads + num_kv_heads: int = 4 # maps to num_key_value_heads / 对应 num_key_value_heads + embedding_dim: int = 4096 # maps to hidden_size / 对应 hidden_size + mlp_hidden_dim: int = 12288 # maps to intermediate_size / 对应 intermediate_size + use_gated_mlp: bool = True # per model arch / 根据模型架构设定 + use_bias: bool = False # per model arch / 根据模型架构设定 + use_qkv_bias: bool = False # per model arch / 根据模型架构设定 + activation: ActivationType = ActivationType.SILU # maps to hidden_act / 对应 hidden_act + norm: NormType = NormType.RMS_NORM # per model arch / 根据模型架构设定 + post_attn_norm: bool = True # per model arch / 根据模型架构设定 + + + + @staticmethod + def get_name(): + # return "qwen3-235B-A22B" + return "qwen3-moe-235B" @dataclass diff --git a/vidur-alibabacloud/vidur/config/node_sku_config.py b/vidur-alibabacloud/vidur/config/node_sku_config.py index e4a8f8c6..9ac40018 100644 --- a/vidur-alibabacloud/vidur/config/node_sku_config.py +++ b/vidur-alibabacloud/vidur/config/node_sku_config.py @@ -68,4 +68,13 @@ class H800DgxNodeSKUConfig(BaseNodeSKUConfig): @staticmethod def get_type(): - return NodeSKUType.H800_DGX \ No newline at end of file + return NodeSKUType.H800_DGX + +@dataclass +class H20DgxNodeSKUConfig(BaseNodeSKUConfig): + device_sku_type: DeviceSKUType = DeviceSKUType.H800 + num_devices_per_node: int = 8 + + @staticmethod + def get_type(): + return NodeSKUType.H20_DGX \ No newline at end of file diff --git a/vidur-alibabacloud/vidur/entities/batch.py b/vidur-alibabacloud/vidur/entities/batch.py index fea431e0..712f2dd5 100644 --- a/vidur-alibabacloud/vidur/entities/batch.py +++ b/vidur-alibabacloud/vidur/entities/batch.py @@ -52,7 +52,6 @@ def __init__( self._completed_at = None self._scheduled = False self._completed = False - # assert len(requests) <=1, f"> debug: 看看requests的长度是不是始终为1" @property def replica_id(self) -> int: diff --git a/vidur-alibabacloud/vidur/entities/cluster.py b/vidur-alibabacloud/vidur/entities/cluster.py index dba3fe98..52e6248f 100644 --- a/vidur-alibabacloud/vidur/entities/cluster.py +++ b/vidur-alibabacloud/vidur/entities/cluster.py @@ -10,12 +10,22 @@ # Cluster contains multiple Replicas class Cluster(BaseEntity): + def __init__( self, cluster_config: ClusterConfig, metrics_config: MetricsConfig, generator_config: BaseRequestGeneratorConfig, ) -> None: + """ + Initialize cluster with replicas based on PD disaggregation config. + 根据 PD 分离配置初始化集群及其 replica + + - pd_node_ratio == 1: MIXED mode, all replicas handle both prefill & decode + pd_node_ratio == 1: MIXED 模式,所有 replica 同时处理 prefill 和 decode + - 0 < pd_node_ratio < 1: PD separation, independent prefill/decode clusters + 0 < pd_node_ratio < 1: PD 分离,独立的 prefill/decode 集群 + """ # >: test when cluster is registered self._id = Cluster.generate_id() @@ -26,13 +36,130 @@ def __init__( # Init replica object handles self._replicas = {} + + rc = self._config.replica_config # shorthand + num_replicas = self._config.num_replicas - for _ in range(self._config.num_replicas): - replica = Replica(self._config.replica_config, generator_config) - self._replicas[replica.id] = replica + # ============================================================ + # PD disaggregation OFF (pd_node_ratio == 1): all replicas are MIXED type + # PD 分离关闭: 所有 replica 都是 MIXED 类型 + # Each replica handles both prefill and decode + # 每个 replica 同时处理 prefill 和 decode + # EP = ws = tp * pp * dp (full cluster world_size) + # ============================================================ + if rc.pd_node_ratio == 1: + dp = num_replicas + full_ws = rc.tensor_parallel_size * rc.num_pipeline_stages * dp + # [EP Auto] Final EP = full cluster world_size + # [EP Auto] 最终 EP = 全集群 world_size + rc.expert_model_parallel_size = full_ws + + # [Key] Set per-phase attributes in non-PD mode for uniform interface + # [关键] 非PD模式也设置 per-phase 属性, 与PD模式保持统一接口 + # In non-PD mode all replicas handle prefill/decode, sharing the same world_size + # 非PD时所有 replica 同时处理 prefill/decode, 共享同一个 world_size + rc.prefill_world_size = full_ws + rc.decode_world_size = full_ws + rc.prefill_ep = full_ws + rc.decode_ep = full_ws + rc._num_prefill_replicas = num_replicas # All replicas do prefill / 所有 replica 都做 prefill + rc._num_decode_replicas = num_replicas # All replicas do decode / 所有 replica 都做 decode + rc._prefill_tp = rc.tensor_parallel_size + rc._prefill_pp = rc.num_pipeline_stages + rc._decode_tp = rc.tensor_parallel_size + rc._decode_pp = rc.num_pipeline_stages + + logger.info(f"{'='*70}") + logger.info(f"[Cluster] PD off, MIXED mode (PD 分离关闭, pd_node_ratio=1, MIXED 模式)") + logger.info(f"[Cluster] tp={rc.tensor_parallel_size}, pp={rc.num_pipeline_stages}, " + f"dp={dp}, ws={full_ws}, ep={full_ws}") + logger.info(f"[Cluster] prefill_ws={rc.prefill_world_size}, decode_ws={rc.decode_world_size} (same, 相同)") + logger.info(f"{'='*70}") + + for _ in range(num_replicas): + replica = Replica(rc, generator_config) + self._replicas[replica.id] = replica + + # ============================================================ + # PD disaggregation ON (0 < pd_node_ratio < 1) + # PD 分离开启 + # Prefill/Decode are independent clusters, may have different TP/PP/EP + # Prefill/Decode 是独立集群, 可有不同 TP/PP/EP + # + # Replica count priority / replica 数量确定优先级: + # 1. num_prefill_replicas (user specified, most flexible) + # num_prefill_replicas (用户直接指定, 最灵活) + # 2. pd_node_ratio (calculated by ratio) + # pd_node_ratio (按比例计算) + # ============================================================ + elif rc.pd_node_ratio > 0 and rc.pd_node_ratio < 1: + # --- Replica count allocation --- + # --- replica 数量分配 --- + if rc.num_prefill_replicas is not None: + # User specified prefill replica count + # 用户直接指定 prefill replica 数量 + num_p = rc.num_prefill_replicas + num_d = num_replicas - num_p + replica_source = f"num_prefill_replicas={rc.num_prefill_replicas} (user specified, 用户指定)" + else: + # Calculate from pd_node_ratio / 通过 pd_node_ratio 计算 + num_p = int(num_replicas * rc.pd_node_ratio) + num_d = num_replicas - num_p + replica_source = f"pd_node_ratio={rc.pd_node_ratio} (by ratio, 按比例)" + + rc._num_prefill_replicas = num_p + rc._num_decode_replicas = num_d + assert num_p > 0 and num_d > 0, ( + f"[Cluster] _num_prefill_replicas={num_p} 和 " + f"_num_decode_replicas={num_d} 必须都 > 0, " + f"来源: {replica_source}") + + # --- per-phase TP/PP (fallback to shared values) --- + # --- per-phase TP/PP (回退到共享值) --- + p_tp = rc.prefill_tensor_parallel_size or rc.tensor_parallel_size + p_pp = rc.prefill_num_pipeline_stages or rc.num_pipeline_stages + d_tp = rc.decode_tensor_parallel_size or rc.tensor_parallel_size + d_pp = rc.decode_num_pipeline_stages or rc.num_pipeline_stages + + # --- per-phase world_size and EP --- + # --- per-phase world_size 和 EP --- + # EP = world_size = tp * pp * dp (ref vLLM: EP_SIZE = TP_SIZE x DP_SIZE) + # EP = world_size = tp * pp * dp (参考 vLLM) + rc.prefill_world_size = p_tp * p_pp * num_p + rc.decode_world_size = d_tp * d_pp * num_d + rc.prefill_ep = rc.prefill_world_size + rc.decode_ep = rc.decode_world_size + + # Save per-phase actual TP/PP for later use + # 保存 per-phase 的实际 TP/PP, 方便后续使用 + rc._prefill_tp = p_tp + rc._prefill_pp = p_pp + rc._decode_tp = d_tp + rc._decode_pp = d_pp + + assert rc.prefill_world_size > 0 and rc.decode_world_size > 0, ( + f"[Cluster] prefill_ws={rc.prefill_world_size} 和 " + f"decode_ws={rc.decode_world_size} 必须都 > 0") + + # --- Verbose PD config printout --- + # --- 详尽打印 PD 配置 --- + logger.info(f"{'='*70}") + logger.info(f"[PD Config] PD enabled (PD 分离开启) ({replica_source})") + logger.info(f"[PD Config] Total replicas (总 replica 数): {num_replicas} " + f"(prefill={num_p}, decode={num_d})") + logger.info(f"[PD Config] Prefill: tp={p_tp}, pp={p_pp}, dp={num_p}, " + f"ws={rc.prefill_world_size}, ep={rc.prefill_ep}") + logger.info(f"[PD Config] Decode: tp={d_tp}, pp={d_pp}, dp={num_d}, " + f"ws={rc.decode_world_size}, ep={rc.decode_ep}") + logger.info(f"{'='*70}") + + for _ in range(num_replicas): + replica = Replica(rc, generator_config) + self._replicas[replica.id] = replica if metrics_config.write_json_trace: self._write_cluster_info_to_file() + @property def replicas(self): diff --git a/vidur-alibabacloud/vidur/entities/execution_time.py b/vidur-alibabacloud/vidur/entities/execution_time.py index b8842ba7..a467660d 100644 --- a/vidur-alibabacloud/vidur/entities/execution_time.py +++ b/vidur-alibabacloud/vidur/entities/execution_time.py @@ -1,19 +1,419 @@ from vidur.entities.base_entity import BaseEntity - -# > add from vidur.config import ( BaseExecutionTimePredictorConfig, BaseReplicaSchedulerConfig, MetricsConfig, ReplicaConfig, ) +from vidur.logger import init_logger + import os import sys import subprocess +import json +import time as time_module from pathlib import Path import csv -from typing import Dict, Optional -# import Dictionary +from typing import Dict, Optional, Tuple + +logger = init_logger(__name__) + +# 获取当前文件目录,用于计算 aicb 的绝对路径 +# Get current file directory for calculating absolute path to aicb +_CURRENT_FILE_DIR = Path(__file__).resolve().parent +# execution_time.py is under vidur-alibabacloud/vidur/entities/ +# aicb is under workspace_root/aicb/ +# Path: entities/ -> vidur/ -> vidur-alibabacloud/ -> workspace_root/ -> aicb/ +_AICB_ROOT = _CURRENT_FILE_DIR.parent.parent.parent / "aicb" + + +# ============================================================ +# [AICB Optimization B+C] Global Cache + Linear Interpolation +# [AICB优化 B+C方案] 全局缓存 + 首尾插值 +# +# Plan C: Global lookup - avoid repeated AICB CSV reads/runs +# Plan B: Head-tail token strategy - linear interpolation for intermediate seq values +# 方案C: 全局查表 - 避免重复读取/运行AICB +# 方案B: 首尾token策略 - 对中间seq值线性插值 +# +# Cache key: (model_name, ws, tp, pp, ep, bs, seq, phase) +# Cache value: {layer_id: {layer_name: {comp_time, comm_size}}} +# ============================================================ + +# AICB cache data directory +# AICB缓存数据存放目录 +_AICB_WORKLOAD_DIR = _CURRENT_FILE_DIR.parent.parent / "data" / "aicb_workload" +_AICB_CACHE_DIR = _AICB_WORKLOAD_DIR / "cache" +_AICB_LOG_DIR = _AICB_WORKLOAD_DIR / "logs" + + +class AICBGlobalCache: + """ + [AICB Optimization B+C] Global AICB Data Cache + [AICB优化 B+C方案] 全局AICB数据缓存 + + Features / 功能: + 1. Plan C (Lookup): Cache loaded AICB data to avoid repeated CSV reads and subprocess calls + 方案C (查表): 缓存已加载的AICB数据,避免重复CSV读取和subprocess调用 + 2. Plan B (Interpolation): Use head-tail token linear interpolation for unmatched seq values + 方案B (插值): 对于没有精确匹配的seq值,使用首尾token线性插值 + 3. Persistence: Save cache index and data to disk for cross-run reuse + 持久化: 将缓存索引和数据保存到磁盘,跨运行复用 + 4. Logging: Record all AICB calls and cache hit stats + 日志: 记录所有AICB调用和缓存命中情况 + """ + + def __init__(self): + # Core cache: (model, ws, tp, pp, ep, bs, seq, phase) -> parsed data + # 核心缓存 + self._cache: Dict[Tuple, Dict] = {} + + # Statistics counters / 统计计数器 + self._stats = { + 'cache_hits': 0, # Exact cache hits / 精确命中查表 + 'interpolations': 0, # Interpolation hits / 插值命中 + 'aicb_calls': 0, # Actual AICB subprocess calls / 实际AICB subprocess调用 + 'csv_loads': 0, # CSV file load count / CSV文件加载次数 + } + + # Ensure directories exist / 确保目录存在 + _AICB_CACHE_DIR.mkdir(parents=True, exist_ok=True) + _AICB_LOG_DIR.mkdir(parents=True, exist_ok=True) + + # Log file / 日志文件 + self._log_file = _AICB_LOG_DIR / "aicb_cache_log.txt" + + # Load persistent index / 加载持久化索引 + self._index_file = _AICB_WORKLOAD_DIR / "cache_index.json" + self._load_index() + + self._log(f"AICBGlobalCache 初始化完成, 缓存目录: {_AICB_CACHE_DIR}") + + def _log(self, msg: str) -> None: + """Write to log file and print / 写入日志文件并打印""" + timestamp = time_module.strftime("%Y-%m-%d %H:%M:%S") + log_line = f"[{timestamp}] {msg}" + try: + with open(self._log_file, 'a') as f: + f.write(log_line + '\n') + except: + pass + logger.debug(f"[AICB Cache] {msg}") + + def _make_key(self, model_name, ws, tp, pp, ep, bs, seq, phase) -> Tuple: + """Generate cache key / 生成缓存key""" + return (model_name, ws, tp, pp, ep, bs, seq, phase) + + def _make_group_key(self, model_name, ws, tp, pp, ep, bs, phase) -> Tuple: + """Generate group key without seq, for finding seq values to interpolate + 生成不含seq的分组key,用于查找同组的seq值进行插值""" + return (model_name, ws, tp, pp, ep, bs, phase) + + def get(self, model_name, ws, tp, pp, ep, bs, seq, phase) -> Optional[Dict]: + """ + Retrieve AICB data from cache. + 从缓存获取AICB数据 + + Lookup strategy (Plan B+C): + 查找策略 (B+C方案): + 1. Exact match (Plan C): return cached data directly + 精确匹配 (方案C): 直接返回缓存数据 + 2. Linear interpolation (Plan B): interpolate between two nearest seq values + 线性插值 (方案B): 找同组中seq最近的两个值,线性插值 + 3. Nearest neighbor: return the only neighbor if just one exists + 最近邻: 如果只有一个邻居,直接返回 + 4. Miss: return None + 未命中: 返回None + + Returns: + Dict or None: AICB data, or None if cache miss + """ + key = self._make_key(model_name, ws, tp, pp, ep, bs, seq, phase) + + # === 1. Exact match (Plan C: lookup) === + # === 1. 精确匹配 (方案C: 查表) === + if key in self._cache: + self._stats['cache_hits'] += 1 + self._log(f"[命中] 精确匹配: key={self._format_key(key)}, " + f"总命中={self._stats['cache_hits']}") + return self._cache[key] + + # === 2. Try interpolation (Plan B: head-tail token strategy) === + # === 2. 尝试插值 (方案B: 首尾token策略) === + group_key = self._make_group_key(model_name, ws, tp, pp, ep, bs, phase) + neighbors = self._find_neighbors(group_key, seq) + + if neighbors is not None: + interpolated = neighbors + self._stats['interpolations'] += 1 + # Cache interpolated result to avoid repeated computation + # 缓存插值结果,避免重复计算 + self._cache[key] = interpolated + self._log(f"[插值] seq={seq}, 使用邻居插值, " + f"总插值={self._stats['interpolations']}") + return interpolated + + # === 3. Cache miss === + # === 3. 未命中 === + self._log(f"[未命中] key={self._format_key(key)}") + return None + + def put(self, model_name, ws, tp, pp, ep, bs, seq, phase, data: Dict) -> None: + """Store AICB data into cache / 将AICB数据存入缓存""" + key = self._make_key(model_name, ws, tp, pp, ep, bs, seq, phase) + self._cache[key] = data + self._log(f"[缓存] key={self._format_key(key)}, layers={len(data)}") + + # Persist to disk cache / 保存到磁盘缓存 + self._save_cache_entry(key, data) + + def record_aicb_call(self) -> None: + """Record an actual AICB subprocess call / 记录一次实际的AICB subprocess调用""" + self._stats['aicb_calls'] += 1 + self._log(f"[AICB调用] 第{self._stats['aicb_calls']}次subprocess调用") + + def record_csv_load(self) -> None: + """Record a CSV file load / 记录一次CSV文件加载""" + self._stats['csv_loads'] += 1 + + def _find_neighbors(self, group_key, target_seq) -> Optional[Dict]: + """ + [Plan B core] Find neighbors of target_seq in the same group for interpolation. + [方案B核心] 在同组中查找target_seq的邻居,进行线性插值 + + Strategy / 策略: + - Collect all cached seq values in the same group (model, ws, tp, pp, ep, bs, phase) + 找到同组中所有已缓存的seq值 + - Find nearest seq values on both sides of target_seq + 找到target_seq两侧最近的seq值 + - Two neighbors: linear interpolation / 两个邻居: 线性插值 + - One neighbor: nearest neighbor / 一个邻居: 使用最近邻 + - No neighbors: return None / 没有邻居: 返回None + """ + # Collect all cached seq values in the same group + # 收集同组的所有已缓存seq值 + cached_seqs = {} + for key, data in self._cache.items(): + # key = (model, ws, tp, pp, ep, bs, seq, phase) + key_group = (key[0], key[1], key[2], key[3], key[4], key[5], key[7]) + if key_group == group_key: + cached_seqs[key[6]] = data # key[6] = seq + + if not cached_seqs: + return None + + seq_values = sorted(cached_seqs.keys()) + + # Find neighbors on both sides of target_seq + # 找到target_seq两侧的邻居 + lower_seq = None + upper_seq = None + for s in seq_values: + if s <= target_seq: + lower_seq = s + if s >= target_seq and upper_seq is None: + upper_seq = s + + # Exact match (should not reach here, but just in case) + # 精确匹配(不应该到这里,但安全起见) + if target_seq in cached_seqs: + return cached_seqs[target_seq] + + # Two neighbors: linear interpolation + # 两个邻居: 线性插值 + if lower_seq is not None and upper_seq is not None and lower_seq != upper_seq: + alpha = (target_seq - lower_seq) / (upper_seq - lower_seq) + interpolated = self._interpolate(cached_seqs[lower_seq], cached_seqs[upper_seq], alpha) + self._log(f" 插值: seq={target_seq}, lower={lower_seq}, upper={upper_seq}, " + f"alpha={alpha:.4f}") + return interpolated + + # Only one neighbor: nearest neighbor + # 只有一个邻居: 最近邻 + nearest = lower_seq if lower_seq is not None else upper_seq + if nearest is not None: + self._log(f" 最近邻: seq={target_seq}, nearest={nearest}") + return cached_seqs[nearest] + + return None + + def _interpolate(self, data_low: Dict, data_high: Dict, alpha: float) -> Dict: + """ + [Plan B core] Linear interpolation between two AICB datasets. + [方案B核心] 对两组AICB数据进行线性插值 + + For each layer's metric (comp_time, comm_size): + 对每层的每个指标: + result = data_low * (1-alpha) + data_high * alpha + + Args: + data_low: AICB data at lower seq / seq较小时的AICB数据 + data_high: AICB data at higher seq / seq较大时的AICB数据 + alpha: interpolation coefficient [0, 1] / 插值系数 + """ + result = {} + # Merge layer_ids from both datasets + # 取两个数据共有的layer_id + all_layers = set(data_low.keys()) | set(data_high.keys()) + + for layer_id in all_layers: + result[layer_id] = {} + low_layer = data_low.get(layer_id, {}) + high_layer = data_high.get(layer_id, {}) + + # Merge sub-components (attention, mlp, moe, etc.) + # 取两层共有的子组件 + all_components = set(low_layer.keys()) | set(high_layer.keys()) + + for comp_name in all_components: + low_comp = low_layer.get(comp_name, {'comp_time': 0.0, 'comm_size': 0.0}) + high_comp = high_layer.get(comp_name, {'comp_time': 0.0, 'comm_size': 0.0}) + + result[layer_id][comp_name] = { + 'comp_time': low_comp['comp_time'] * (1 - alpha) + high_comp['comp_time'] * alpha, + 'comm_size': low_comp['comm_size'] * (1 - alpha) + high_comp['comm_size'] * alpha, + } + + return result + + def _format_key(self, key: Tuple) -> str: + """Format key as a human-readable string / 格式化key为可读字符串""" + return (f"model={key[0]}, ws={key[1]}, tp={key[2]}, pp={key[3]}, " + f"ep={key[4]}, bs={key[5]}, seq={key[6]}, phase={key[7]}") + + def _save_cache_entry(self, key: Tuple, data: Dict) -> None: + """Save a cache entry to disk / 将缓存条目保存到磁盘""" + try: + # Encode key info into filename + # 文件名编码key信息 + filename = (f"aicb-{key[0]}-ws{key[1]}-tp{key[2]}-pp{key[3]}" + f"-ep{key[4]}-bs{key[5]}-seq{key[6]}-{key[7]}.json") + filepath = _AICB_CACHE_DIR / filename + + # Convert int keys to str keys (JSON requirement) + # 将int key转为str key (JSON要求) + serializable = {} + for lid, ldata in data.items(): + serializable[str(lid)] = ldata + + with open(filepath, 'w') as f: + json.dump(serializable, f, indent=2) + except Exception as e: + self._log(f"[WARNING] 保存缓存条目失败: {e}") + + def _load_index(self) -> None: + """Load cache index and existing data from disk / 从磁盘加载缓存索引和已有数据""" + try: + if _AICB_CACHE_DIR.exists(): + json_files = list(_AICB_CACHE_DIR.glob("aicb-*.json")) + loaded = 0 + for jf in json_files: + try: + # Parse key from filename + # 从文件名解析key + key = self._parse_filename(jf.name) + if key is None: + continue + + with open(jf, 'r') as f: + raw_data = json.load(f) + + # Restore int keys + # 恢复int key + data = {} + for lid_str, ldata in raw_data.items(): + data[int(lid_str)] = ldata + + self._cache[key] = data + loaded += 1 + except: + continue + + if loaded > 0: + self._log(f"从磁盘加载了 {loaded} 条缓存记录") + except: + pass + + def _parse_filename(self, filename: str) -> Optional[Tuple]: + """Parse key from cache filename / 从缓存文件名解析key""" + try: + # aicb-ModelName-ws32-tp4-pp1-ep4-bs1-seq100-prefill.json + if not filename.startswith("aicb-") or not filename.endswith(".json"): + return None + + name = filename[5:-5] # 去掉 "aicb-" 和 ".json" + parts = name.rsplit('-', 7) # 从右边分割,取最后7个部分 + if len(parts) < 8: + return None + + # Last 7 parts: ws{}, tp{}, pp{}, ep{}, bs{}, seq{}, phase + # 最后7个部分 + model_name = parts[0] + phase = parts[-1] + seq = int(parts[-2].replace('seq', '')) + bs = int(parts[-3].replace('bs', '')) + ep = int(parts[-4].replace('ep', '')) + pp = int(parts[-5].replace('pp', '')) + tp = int(parts[-6].replace('tp', '')) + ws = int(parts[-7].replace('ws', '')) + + return (model_name, ws, tp, pp, ep, bs, seq, phase) + except: + return None + + def print_stats(self) -> None: + """Print cache statistics / 打印缓存统计信息""" + total_queries = (self._stats['cache_hits'] + self._stats['interpolations'] + + self._stats['aicb_calls']) + logger.info(f"\n{'='*70}") + logger.info(f"[AICB Cache Stats Report (统计报告)]") + logger.info(f"{'='*70}") + logger.info(f" Cache entries (缓存条目数): {len(self._cache)}") + logger.info(f" Exact hits (精确命中, 查表): {self._stats['cache_hits']}") + logger.info(f" Interpolated hits (插值命中): {self._stats['interpolations']}") + logger.info(f" AICB real calls (AICB实际调用): {self._stats['aicb_calls']}") + logger.info(f" CSV file loads (CSV文件加载): {self._stats['csv_loads']}") + if total_queries > 0: + hit_rate = (self._stats['cache_hits'] + self._stats['interpolations']) / total_queries * 100 + logger.info(f" Cache hit rate (缓存命中率): {hit_rate:.1f}%") + logger.info(f" Cache dir (缓存目录): {_AICB_CACHE_DIR}") + logger.info(f" Log file (日志文件): {self._log_file}") + logger.info(f"{'='*70}\n") + + # Also write to log file / 也写入日志 + self._log(f"统计: hits={self._stats['cache_hits']}, " + f"interp={self._stats['interpolations']}, " + f"calls={self._stats['aicb_calls']}, " + f"entries={len(self._cache)}") + + def save_lookup_table(self) -> None: + """Save complete lookup index to JSON for inspection / 保存完整的查表索引到JSON,方便查看""" + try: + table = {} + for key, data in self._cache.items(): + key_str = self._format_key(key) + table[key_str] = { + 'num_layers': len(data), + 'layer_ids': sorted([int(k) for k in data.keys()]), + } + + table_file = _AICB_WORKLOAD_DIR / "lookup_table.json" + with open(table_file, 'w') as f: + json.dump(table, f, indent=2, ensure_ascii=False) + self._log(f"查表索引已保存到 {table_file}") + except Exception as e: + self._log(f"[WARNING] 保存查表索引失败: {e}") + + +# ============================================================ +# Global singleton: all ExecutionTime objects share the same cache +# 全局单例: 所有 ExecutionTime 对象共享同一个缓存 +# ============================================================ +_GLOBAL_AICB_CACHE = AICBGlobalCache() + +# [首尾插值] 记录预加载失败的key,避免重复尝试 +# Record failed preload keys to avoid repeated attempts +_FAILED_PRELOAD_KEYS = set() class ExecutionTime(BaseEntity): def __init__( @@ -73,25 +473,25 @@ def __init__( self._process_model_outputs_time = process_model_outputs_time self._ray_comm_time = ray_comm_time - # > add - # self._config = predictor_config self._config = predictor_config self._replica_config = replica_config self._model_config = replica_config.model_config self.replica_scheduler_config = replica_scheduler_config + # Cache AICB data to avoid repeated loading # 缓存 AICB 数据,避免重复加载 + # Optional[Dict[str, float]]: can be None or a dict mapping str to float # Optional[Dict[str, float]] 表示这个变量可以是 None 或者是一个键为字符串、值为浮点数的字典。 self._aicb_data: Optional[Dict[str, float]] = None + # Two allreduces in mlp and attention layers are implemented here # mlp和attention中的两次allreduce在这里实现 - # Implementation of two allreduces in mlp and attention layers def _get_mlp_layer_execution_time(self) -> float: assert self._mlp_layer_up_proj_execution_time \ + self._mlp_layer_down_proj_execution_time \ + self._mlp_layer_act_execution_time \ + self._tensor_parallel_communication_time \ - + self._mlp_norm_time > 0, f"> debug" + + self._mlp_norm_time > 0, "MLP layer execution time must be positive" return ( self._mlp_layer_up_proj_execution_time + self._mlp_layer_down_proj_execution_time @@ -108,7 +508,7 @@ def _get_attention_layer_execution_time(self) -> float: + self._attention_decode_execution_time \ + self._attention_prefill_execution_time \ + self._tensor_parallel_communication_time \ - + self._attn_norm_time > 0, f"> debug" + + self._attn_norm_time > 0, "Attention layer execution time must be positive" return ( self._attention_layer_pre_proj_execution_time + self._attention_layer_post_proj_execution_time @@ -124,72 +524,95 @@ def _get_attention_layer_execution_time_from_aicb(self,layer_id) -> float: if self._aicb_data is None: self._aicb_data = self._load_aicb_data() - layer_data = self._aicb_data.get(layer_id, {}).get("attention", {}) - # 单位从ns转换为s - # Convert unit from ns to s + # If AICB data is empty, return a small default to avoid division by zero + # 如果 AICB 数据为空,返回一个小的默认值避免除零 + if not self._aicb_data: + logger.warning("AICB data is empty, using default attention execution time") + return 1e-6 # 1 microsecond as default + + layer_data = self._aicb_data.get(layer_id, {}).get("attention", {}) + + # Convert unit from ns to s / 单侍从ns转换为s attention_comp_time = layer_data.get('comp_time', 0.0) * 1e-9 - # 单位Byte - # Unit: Byte + # Unit: Byte / 单位Byte attention_comm_size = layer_data.get('comm_size', 0.0) - attention_time = attention_comp_time + 0 # TODO attention_comm_time - return attention_time + attention_time = attention_comp_time + 0 # TODO(tianhao909): add attention_comm_time + return attention_time if attention_time > 0 else 1e-6 # def _get_mlp_layer_execution_time_from_dpsk_and_aiob(self) -> float: # def _get_mlp_layer_execution_time_from_aicb(self) -> float: def _get_mlp_layer_execution_time_from_aicb(self, layer_id) -> float: if self._aicb_data is None: self._aicb_data = self._load_aicb_data() - layer_data = self._aicb_data.get(layer_id, {}).get("mlp", {}) - # 单位从ns转换为s - # Convert unit from ns to s + # If AICB data is empty, return a small default to avoid division by zero + # 如果 AICB 数据为空,返回一个小的默认值避免除零 + if not self._aicb_data: + logger.warning("AICB data is empty, using default MLP execution time") + return 1e-6 # 1 microsecond as default + + layer_data = self._aicb_data.get(layer_id, {}).get("mlp", {}) + + # Convert unit from ns to s / 单侍从ns转换为s mlp_comp_time = layer_data.get('comp_time', 0.0) * 1e-9 - # 单位Byte - # Unit: Byte + # Unit: Byte / 单位Byte mlp_comm_size = layer_data.get('comm_size', 0.0) - mlp_time = mlp_comp_time + 0 # TODO mlp_comm_time - return mlp_time + mlp_time = mlp_comp_time + 0 # TODO(tianhao909): add mlp_comm_time + return mlp_time if mlp_time > 0 else 1e-6 def _get_moe_layer_execution_time_from_aicb(self, layer_id) -> float: if self._aicb_data is None: self._aicb_data = self._load_aicb_data() # return self._aicb_data.get("moe") - # 从数据结构中获取对应的值 + # If AICB data is empty, return a small default to avoid division by zero + # 如果 AICB 数据为空,返回一个小的默认值避免除零 + if not self._aicb_data: + logger.warning("AICB data is empty, using default MoE execution time") + return 1e-6 # 1 microsecond as default + # Get corresponding values from the data structure + # 从数据结构中获取对应的值 layer_data = self._aicb_data.get(layer_id, {}).get("moe", {}) # +comm # return layer_data.get('comp_time', 0.0) - replica_stage = "prefill" #TODO stage + replica_stage = "prefill" # TODO(tianhao909): determine stage from runtime context - # 单位从ns转换为s - # Convert unit from ns to s + # Convert unit from ns to s / 单侍从ns转换为s moe_comp_time = layer_data.get('comp_time', 0.0) * 1e-9 - # 单位Byte - # Unit: Byte + # Unit: Byte / 单位Byte moe_comm_size = layer_data.get('comm_size', 0.0) if replica_stage == "prefill": # normal kernel - # Gbps换算成 Byte/s - # Convert Gbps to Byte/s + # Convert Gbps to Byte/s / Gbps换算成 Byte/s cur_bw = self._replica_config.rdma_bandwidth * 1024 * 1024 * 1024 / 8 elif replica_stage == "decode": # low_latency kernel - # Gbps换算成 Byte/s - # Convert Gbps to Byte/s + # Convert Gbps to Byte/s / Gbps换算成 Byte/s cur_bw = self._replica_config.nvlink_bandwidth * 1024 * 1024 * 1024 / 8 moe_comm_time = moe_comm_size / cur_bw # 秒 moe_time = moe_comp_time + moe_comm_time - # print(f"> debug layer_id={layer_id} moe_time={moe_time} us moe_comp_time={moe_comp_time} us moe_comm_time={moe_comm_time}") - return moe_time + return moe_time if moe_time > 0 else 1e-6 def _get_aicb_params(self): + """ + Get AICB invocation parameters. + 获取 AICB 调用参数 + + Automatically reads per-phase TP/PP/WS/EP from replica_config. + These values are set correctly per phase in base_execution_time_predictor.py. + 自动从 replica_config 读取 per-phase 的 TP/PP/WS/EP + 这些值已在 base_execution_time_predictor.py 中按 phase 正确设置 + + Returns: + (model_name, model_json_file, tp, pp, ws, ep, bs, seq, phase) + """ if self._replica_config.model_name == 'deepseek-671B': model_name = "DeepSeek-671B" model_json_file = "./scripts/inference_configs/deepseek_default.json" @@ -200,10 +623,15 @@ def _get_aicb_params(self): model_name = "Qwen3-Next-80B" model_json_file = "./scripts/inference_configs/qwen3_next_default.json" + # [PD-Aware] These values are set correctly per phase in base_execution_time_predictor.py + # [PD-Aware] 这些值已在 base_execution_time_predictor.py 中按 phase 正确设置 tp = self._replica_config.tensor_parallel_size pp = self._replica_config.num_pipeline_stages ws = self._replica_config.world_size - ep = self._replica_config.expert_model_parallel_size + # ep = self._replica_config.expert_model_parallel_size + ep = self._replica_config.expert_model_parallel_size # [EP Auto] = per-phase world_size + # TODO(tianhao909): use real values for bs/seq/phase alignment + # TODO(tianhao909): 把这几个加进去,用真实的值对齐 bs = self._replica_config.batch_size seq = self._replica_config.seq_len phase = self._replica_config.phase @@ -211,105 +639,498 @@ def _get_aicb_params(self): return model_name, model_json_file, tp, pp, ws, ep, bs, seq, phase def _get_aicb_csv_path(self) -> str: - """根据当前配置生成 AICB CSV 的预期路径""" - """Generate expected AICB CSV path based on current configuration""" + """Generate expected AICB CSV path based on current configuration + 根据当前配置生成 AICB CSV 的预期路径""" model_name, _, tp, pp, ws, ep, bs, seq, phase = self._get_aicb_params() - print(f'get aicb csv path: {model_name} world_size{ws}-tp{tp}-pp{pp}-ep{ep}-bs{bs}-seq{seq}-{phase}') + logger.debug(f'get aicb csv path: {model_name} world_size{ws}-tp{tp}-pp{pp}-ep{ep}-bs{bs}-seq{seq}-{phase}') filename = ( f"vidur-{model_name}-world_size{ws}-tp{tp}-pp{pp}-ep{ep}" f"-bs{bs}-seq{seq}-{phase}.csv" ) - return os.path.join("results", "workload", filename) + # Use absolute path based on code file location + # 使用基于代码文件位置的绝对路径 + return str(_AICB_ROOT / "results" / "workload" / filename) def _generate_aicb_csv(self): - # TODO > 加生成的代码 - # TODO > Add generation code - return - model_name,model_json_file, tp, pp, ws, ep, bs, seq, phase = self._get_aicb_params() - cwd="../../../aicb/" - - # TODO sys.executable 这样会使用vidur虚拟环境的python,确保与aicb的协同 - # TODO sys.executable This will use vidur virtual environment's python to ensure coordination with aicb + """Generate AICB CSV file / 生成AICB CSV文件""" + model_name, model_json_file, tp, pp, ws, ep, bs, seq, phase = self._get_aicb_params() + logger.debug(f"_generate_aicb_csv: model_name={model_name} model_json_file={model_json_file} tp={tp} pp={pp} ws={ws} ep={ep} bs={bs} seq={seq} phase={phase}") + # Use absolute path based on code file location + # 使用基于代码文件位置的绝对路径 + cwd = str(_AICB_ROOT) + cwd_path = Path(cwd) + + logger.debug(f'\n{"="*80}') + logger.debug(f'===== AICB CSV Generation Debug Info =====') + logger.debug(f'{"="*80}') + + # Check if AICB directory exists / 检查 AICB 目录是否存在 + if not cwd_path.exists(): + logger.error(f'AICB directory does not exist: {cwd}') + logger.error(f'Please ensure AICB is properly installed') + return False + + logger.debug(f'AICB Root Directory: {cwd_path}') + logger.debug(f'AICB Root exists: {cwd_path.exists()}') + + # Check results/workload directory / 检查 results/workload 目录 + results_dir = cwd_path / "results" / "workload" + logger.debug(f'Results directory: {results_dir}') + logger.debug(f'Results directory exists: {results_dir.exists()}') + + # Create directory if not exists / 如果不存在,创建目录 + if not results_dir.exists(): + logger.debug(f'Creating results directory: {results_dir}') + results_dir.mkdir(parents=True, exist_ok=True) + + # List existing files in results/workload directory + # 列出 results/workload 目录下已有的文件 + if results_dir.exists(): + existing_files = list(results_dir.glob('*.csv')) + logger.debug(f'Existing CSV files in results/workload ({len(existing_files)} files):') + for f in existing_files[:10]: + logger.debug(f' - {f.name}') + if len(existing_files) > 10: + logger.debug(f' ... and {len(existing_files) - 10} more files') + + # Build AICB command / 构建AICB命令 cmd = [ - sys.executable, + sys.executable, "-m", "workload_generator.Vidur_workload_generator", - str(model_name), - str(model_json_file), + model_name, + model_json_file, "--seq_length", str(seq), "--micro_batch", str(bs), "--world_size", str(ws), "--tensor_model_parallel_size", str(tp), "--expert_model_parallel_size", str(ep), "--aiob_enable", - "--phase", str(phase), + "--phase", phase, ] - # pp: cmd += ["--pipeline_model_parallel", str(pp)] + + if pp > 1: + cmd.extend(["--pipeline_model_parallel", str(pp)]) - cwd_path = Path(cwd) - print(f'[DEBUG] run aicb cmd: {cmd}') - result = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd_path, text=True) - if result.returncode != 0: - raise RuntimeError(f"Command {cmd} failed with return code {result.returncode}") + # Print command for manual execution / 打印可以手动执行的命令 + cmd_str = " ".join(cmd) + logger.debug(f'\n===== Command Details =====') + logger.debug(f'Working directory: cd {cwd_path}') + logger.debug(f'Full command: {cmd_str}') + logger.debug(f'One-liner: cd {cwd_path} && {cmd_str}') + + # Expected output CSV file path / 预期生成的文件路径 + expected_csv = self._get_aicb_csv_path() + logger.debug(f'Expected output CSV file: {expected_csv}') + + try: + logger.debug(f'\n===== Executing Command =====') + result = subprocess.run(cmd, cwd=cwd_path, capture_output=True, text=True, timeout=300) + + logger.debug(f'Return code: {result.returncode}') + if result.stdout.strip(): + logger.debug(f'STDOUT: {result.stdout.strip()}') + if result.stderr.strip(): + logger.debug(f'STDERR: {result.stderr.strip()}') + + # Check results/workload directory after command execution + # 检查命令执行后 results/workload 目录的变化 + logger.debug(f'===== Post-execution Check =====') + if results_dir.exists(): + new_files = list(results_dir.glob('*.csv')) + logger.debug(f'CSV files after execution ({len(new_files)} files)') + + if os.path.exists(expected_csv): + logger.info(f'SUCCESS: Expected CSV file was created!') + else: + logger.warning(f'Expected CSV file was NOT created!') + similar_files = list(results_dir.glob(f'*{model_name}*{phase}*.csv')) + if similar_files: + logger.debug(f'Similar files found: {[f.name for f in similar_files]}') + else: + logger.debug(f'No similar files found matching pattern: *{model_name}*{phase}*.csv') + + if result.returncode != 0: + logger.error(f'AICB command failed with return code {result.returncode}') + return False + else: + logger.debug(f'AICB command succeeded (returncode=0)') + return True + + except subprocess.TimeoutExpired: + logger.error('AICB command timed out after 300 seconds') + return False + except Exception as e: + logger.error(f'Failed to run AICB command: {e}', exc_info=True) + return False + + + def _generate_or_find_bs1_csv( + self, model_name, ws, tp, pp, ep, bs, seq, phase, original_csv_path + ) -> str: + """ + [AICB Safe Mode] Always use bs=1 to generate or find CSV. + [AICB Safe Mode] 始终使用 bs=1 生成或查找 CSV + + Reason: AICB's per_token_group_quant_fp8 CUDA kernel is incompatible + with bs>1 on some GPUs (e.g. H20), causing "CUDA error: invalid + configuration argument". + 原因: AICB 的 per_token_group_quant_fp8 等 CUDA kernel 在某些 GPU + (如 H20) 上对 bs>1 不兼容 + + Strategy / 策略: + 1. If requested bs=1, generate directly / 如果请求的就是 bs=1, 直接生成 + 2. If bs>1, look for existing bs=1 CSV / 如果 bs>1, 查找已有的 bs=1 CSV + 3. If bs=1 CSV missing, generate it / 如果 bs=1 CSV 不存在, 生成它 + + Args: + model_name: Model name / 模型名 + ws, tp, pp, ep: Parallelism config / 并行配置 + bs: Original requested batch size (may be > 1) + seq: Sequence length / 序列长度 + phase: Stage (prefill/decode) / 阶段 + original_csv_path: Original CSV path (may be bs>1) + + Returns: + Path to found or generated CSV (may be bs=1 CSV) + """ + logger.debug(f'[AICB Safe Mode] CSV不存在: {original_csv_path}') + logger.debug(f'[AICB Safe Mode] 请求参数: model={model_name}, bs={bs}, seq={seq}, phase={phase}') + + # ---- Case 1: bs=1 already, generate directly ---- + # ---- 情况1: 本身就是 bs=1, 直接生成 ---- + if bs == 1: + logger.debug(f'[AICB Safe Mode] bs=1, 直接生成...') + _GLOBAL_AICB_CACHE.record_aicb_call() + if self._generate_aicb_csv(): + if os.path.exists(original_csv_path): + logger.info(f'[AICB Safe Mode] 成功生成 bs=1 CSV: {original_csv_path}') + return original_csv_path + else: + logger.warning('[AICB Safe Mode] 生成后未找到 CSV (文件名可能不匹配)') + else: + logger.warning('[AICB Safe Mode] bs=1 生成失败') + return original_csv_path + + # ---- Case 2: bs > 1, skip original bs, use bs=1 instead ---- + # ---- 情况2: bs > 1, 跳过原始 bs, 直接使用 bs=1 ---- + logger.debug(f'[AICB Safe Mode] bs={bs} > 1, 跳过原始bs (避免CUDA kernel错误), 使用 bs=1') + + # Temporarily switch to bs=1 to get the bs=1 CSV path + # 临时切换到 bs=1 以获取 bs=1 的 CSV 路径 + original_bs = self._replica_config.batch_size + self._replica_config.batch_size = 1 + bs1_csv_path = self._get_aicb_csv_path() + self._replica_config.batch_size = original_bs # Restore immediately / 立即恢复 + + logger.debug(f'[AICB Safe Mode] bs=1 CSV 路径: {bs1_csv_path}') + + # Check if bs=1 CSV already exists + # 检查 bs=1 CSV 是否已存在 + if os.path.exists(bs1_csv_path): + logger.debug('[AICB Safe Mode] 找到已有 bs=1 CSV (无需生成)') + return bs1_csv_path + + # bs=1 CSV doesn't exist, generate it + # bs=1 CSV 不存在, 生成它 + logger.debug('[AICB Safe Mode] bs=1 CSV 不存在, 开始生成...') + original_bs = self._replica_config.batch_size + self._replica_config.batch_size = 1 + + _GLOBAL_AICB_CACHE.record_aicb_call() + gen_ok = self._generate_aicb_csv() + + self._replica_config.batch_size = original_bs # Restore / 恢复 + + if gen_ok and os.path.exists(bs1_csv_path): + logger.info(f'[AICB Safe Mode] 成功生成 bs=1 CSV: {bs1_csv_path}') + return bs1_csv_path + else: + logger.warning('[AICB Safe Mode] bs=1 生成失败或未找到 CSV') + return original_csv_path # 返回原路径, 后续 fallback 逻辑会处理 def _load_aicb_data(self) -> Dict[int, Dict[str, Dict[str, float]]]: - """加载 CSV,返回 {layer_id: {layer_name: {comp_time: value, comm_size: value}}}""" - """Load CSV, returning {layer_id: {layer_name: {comp_time: value, comm_size: value}}}""" + """ + [AICB Optimization B+C] Load AICB data, preferring global cache and interpolation. + [AICB优化 B+C方案] 加载AICB数据,优先使用全局缓存和插值 + + Lookup flow / 查找流程: + 1. Check global cache exact match (Plan C: lookup) + 检查全局缓存精确匹配 (方案C: 查表) + 2. Check global cache interpolation (Plan B: head-tail token strategy) + 检查全局缓存插值 (方案B: 首尾token策略) + 3. If both miss, read/generate CSV and cache + 如果都没有,读取/生成CSV并缓存 + + Returns: + {layer_id: {layer_name: {comp_time: value, comm_size: value}}} + """ + global _GLOBAL_AICB_CACHE + if self._aicb_data is not None: return self._aicb_data + # Get current parameters / 获取当前参数 + model_name, _, tp, pp, ws, ep, bs, seq, phase = self._get_aicb_params() + + # === Step 1+2: Try global cache (exact match or interpolation) === + # === 步骤1+2: 尝试从全局缓存获取 (精确匹配 或 插值) === + cached_data = _GLOBAL_AICB_CACHE.get(model_name, ws, tp, pp, ep, bs, seq, phase) + if cached_data is not None: + self._aicb_data = cached_data + # [Head-tail interp] On cache hit, also ensure last_seq is preloaded + # [首尾插值] 缓存命中时,也确保last_seq已预加载 + # So even old cache without last_seq can be supplemented + # 这样即使旧缓存中没有last_seq,也能补充加载 + self._ensure_decode_endpoint_preloaded(phase, seq) + return cached_data + + # === Step 3: Cache miss, need to read/generate CSV === + # === 步骤3: 缓存未命中,需要读取/生成CSV === + print(f"[AICB] Cache miss, loading CSV (缓存未命中,需要加载CSV): " + f"model={model_name}, bs={bs}, seq={seq}, phase={phase}") + csv_path = self._get_aicb_csv_path() - full_csv_path = os.path.join("../../../aicb/results/workload/", csv_path) + full_csv_path = csv_path if not os.path.exists(full_csv_path): + # ============================================================ + # [AICB Safe Mode] Always use bs=1 to generate/find CSV + # [AICB Safe Mode] 始终使用 bs=1 生成/查找 CSV + # + # Reason: AICB's per_token_group_quant_fp8 CUDA kernel + # is incompatible with bs>1 on some GPUs (e.g. H20) + # 原因: AICB 的 per_token_group_quant_fp8 等 CUDA kernel + # 在某些 GPU (如 H20) 上对 bs>1 不兼容 + # + # Strategy: Use bs=1 CSV because: + # 策略: 直接使用 bs=1 生成 CSV, 因为: + # 1. AICB measures per-token CUDA kernel time + # AICB 测量的是 per-token CUDA kernel 耗时 + # 2. Per-token time is largely independent of batch size + # 单 token 耗时与 batch size 基本无关 + # 3. bs=1 has simplest kernel config, best GPU compatibility + # bs=1 的 kernel 配置最简单, GPU 兼容性最好 + # ============================================================ + full_csv_path = self._generate_or_find_bs1_csv( + model_name, ws, tp, pp, ep, bs, seq, phase, full_csv_path + ) - # TODO > 加生成的代码 - # TODO > Add generated code - self._generate_aicb_csv() if not os.path.exists(full_csv_path): - print(f'[DEBUG] still not exists {full_csv_path}') - full_csv_path = '../aicb/results/workload/vidur-DeepSeek-671B-world_size32-tp1-pp1-ep32-bs4-seq4096-decode.csv' + # ============================================================ + # [AICB Fallback] Search for existing CSV of the same model + # in results/workload/ directory + # [AICB Fallback] 在 results/workload/ 目录搜索同模型的已有CSV + # + # Search strategy (by priority) / 搜索策略 (按优先级): + # 1. Same model + same ws + same phase (different bs/seq) + # 同模型 + 同ws + 同phase (不同 bs/seq) + # 2. Same model + same phase (different ws/bs/seq) + # 同模型 + 同phase (不同 ws/bs/seq) + # 3. Any model's fallback CSV + # 任意模型的兖底 CSV + # ============================================================ + import glob + search_dir = os.path.dirname(full_csv_path) + + # Get correct model name (from _get_aicb_params, already proper case) + # 获取正确的模型名 + found_fallback = False + + # Priority 1: same model + same ws + same phase + # 优先级1: 同模型 + 同ws + 同phase + pattern1 = os.path.join(search_dir, + f"vidur-{model_name}-world_size{ws}-tp{tp}-pp{pp}-ep{ep}-bs*-seq*-{phase}.csv") + matches1 = sorted(glob.glob(pattern1)) + if matches1: + full_csv_path = matches1[0] + logger.info(f'[AICB Fallback] 找到同模型同ws: {full_csv_path}') + found_fallback = True + + # Priority 2: same model + same phase (any ws/ep) + # 优先级2: 同模型 + 同phase (任意 ws/ep) + if not found_fallback: + pattern2 = os.path.join(search_dir, f"vidur-{model_name}-*-{phase}.csv") + matches2 = sorted(glob.glob(pattern2)) + if matches2: + full_csv_path = matches2[0] + logger.info(f'[AICB Fallback] 找到同模型: {full_csv_path}') + found_fallback = True + + # Priority 3: any CSV as fallback + # 优先级3: 任意 CSV 兖底 + if not found_fallback: + all_csvs = sorted(glob.glob(os.path.join(search_dir, "vidur-*.csv"))) + if all_csvs: + full_csv_path = all_csvs[0] + logger.info(f'[AICB Fallback] 使用任意可用CSV: {full_csv_path}') + found_fallback = True + + if not found_fallback: + logger.error('无法找到任何AICB CSV文件') + return {} + + # === Parse CSV === + # === 解析CSV === + _GLOBAL_AICB_CACHE.record_csv_load() + data = self._parse_aicb_csv(full_csv_path) + + if data: + # Store into global cache / 存入全局缓存 + _GLOBAL_AICB_CACHE.put(model_name, ws, tp, pp, ep, bs, seq, phase, data) + + # Also copy CSV to aicb_workload/cache dir for inspection + # 同时复制CSV到aicb_workload/cache目录,方便查看 + try: + import shutil + cache_csv = _AICB_CACHE_DIR / os.path.basename(full_csv_path) + if not cache_csv.exists(): + shutil.copy2(full_csv_path, cache_csv) + except: + pass + + # ============================================================ + # [Head-tail interp optimization] Preload AICB data for decode's last round + # [首尾插值优化] 预加载decode最后一轮的AICB数据 + # ============================================================ + self._ensure_decode_endpoint_preloaded(phase, seq) + + self._aicb_data = data + return data + def _ensure_decode_endpoint_preloaded(self, phase: str, current_seq: int): + """ + [Head-tail interp] Ensure AICB data for decode's last round is preloaded. + [首尾插值] 确保decode最后一轮的AICB数据已预加载 + + Called from both cache-hit and cache-miss paths. + _preload_decode_endpoint internally checks cache, returns immediately if loaded. + 无论是缓存命中还是缓存未命中路径都会调用此方法。 + + Args: + phase: Current stage ("prefill" or "decode") / 当前阶段 + current_seq: Current iteration's seq value / 当前迭代的seq值 + """ + if phase == "decode" and hasattr(self._replica_config, 'decode_last_seq'): + last_seq = self._replica_config.decode_last_seq + if last_seq is not None and last_seq != current_seq: + self._preload_decode_endpoint(last_seq) - # 解析 CSV:按 layer_id 和 layer_name 分组存储所有数据 - # Parsing CSV: Group and store all data by layer_id and layer_name + + def _preload_decode_endpoint(self, last_seq: int): + """ + [Head-tail interp optimization] Preload AICB data for decode's last round. + [首尾插值优化] 预加载decode最后一轮的AICB数据 + + Purpose: When first loading decode round 1 CSV, also load the last round's + seq CSV so intermediate iterations can get more accurate execution times + via linear interpolation. + 目的: 在首次加载decode第一轮CSV时,同时加载最后一轮的seq对应的CSV + + Principle / 原理: + - KV Cache grows during decode, so computation changes with seq + Transformer推理中,decode阶段的KV Cache随seq增长 + - Using only round 1 data (nearest neighbor) ignores this growth trend + 只用第一轮数据(最近邻)会忽略这种增长趋势 + - Head-tail interpolation captures the linear growth + 首尾两点线性插值可以捕捉这种线性增长 + + Args: + last_seq: Seq_len value of the last decode round / 最后一轮decode的seq_len值 + """ + global _GLOBAL_AICB_CACHE, _FAILED_PRELOAD_KEYS + + model_name, _, tp, pp, ws, ep, bs, _, phase = self._get_aicb_params() + + # Note: Must use exact match check, not get() (which triggers nearest neighbor/interpolation) + # 注意: 必须用精确匹配检查,不能用 get() (它会触发最近邻/插值) + # Otherwise seq=106 would match seq=100 via nearest neighbor, skipping real CSV load + # 否则 seq=106 会被最近邻匹配到 seq=100 的数据,跳过真正的CSV加载 + exact_key = _GLOBAL_AICB_CACHE._make_key(model_name, ws, tp, pp, ep, bs, last_seq, phase) + if exact_key in _GLOBAL_AICB_CACHE._cache: + # Already has real data, no need to preload + # 已有真实数据,无需预加载 + return + + # Avoid repeated attempts on failed preloads + # 避免重复尝试已失败的预加载 + if exact_key in _FAILED_PRELOAD_KEYS: + return + + logger.info(f"[AICB] Preloading decode last round (开始预加载decode最后一轮): " + f"model={model_name}, bs={bs}, seq={last_seq}, phase={phase}") + + # Temporarily modify seq_len to generate corresponding CSV path + # 临时修改seq_len以生成对应的CSV路径 + original_seq = self._replica_config.seq_len + self._replica_config.seq_len = last_seq + + csv_path = self._get_aicb_csv_path() + + if not os.path.exists(csv_path): + # ============================================================ + # [AICB Safe Mode] Use bs=1, avoid CUDA kernel compatibility issues + # [AICB Safe Mode] 使用 bs=1 生成, 避免 CUDA kernel 兼容性问题 + # ============================================================ + logger.debug(f"[AICB首尾插值] last_seq CSV不存在: {csv_path}") + csv_path = self._generate_or_find_bs1_csv( + model_name, ws, tp, pp, ep, bs, last_seq, phase, csv_path + ) + + if os.path.exists(csv_path): + _GLOBAL_AICB_CACHE.record_csv_load() + data = self._parse_aicb_csv(csv_path) + if data: + _GLOBAL_AICB_CACHE.put(model_name, ws, tp, pp, ep, bs, last_seq, phase, data) + logger.info(f"[AICB首尾插值] 成功缓存last_seq={last_seq}, layers={len(data)}") + + # Copy CSV to cache directory / 复制CSV到缓存目录 + try: + import shutil + cache_csv = _AICB_CACHE_DIR / os.path.basename(csv_path) + if not cache_csv.exists(): + shutil.copy2(csv_path, cache_csv) + except: + pass + else: + logger.warning("[AICB首尾插值] last_seq CSV解析为空") + _FAILED_PRELOAD_KEYS.add(exact_key) + else: + logger.warning(f"[AICB首尾插值] 生成后仍未找到last_seq CSV: {csv_path}") + _FAILED_PRELOAD_KEYS.add(exact_key) + + # Restore original seq_len / 恢复原始seq_len + self._replica_config.seq_len = original_seq + + def _parse_aicb_csv(self, csv_path: str) -> Dict[int, Dict[str, Dict[str, float]]]: + """ + Parse AICB CSV file and return structured data. + 解析AICB CSV文件,返回结构化数据 + + Extracted from _load_aicb_data for clean separation. + 从 _load_aicb_data 中提取的CSV解析逻辑。 + + Returns: + {layer_id: {layer_name: {comp_time: float, comm_size: float}}} + """ data: Dict[int, Dict[str, Dict[str, float]]] = {} try: - with open(full_csv_path, newline='') as f: - # 检查文件内容 - # Check File Content - - # content = f.read(1000) # Read the first 1000 characters - # print(f"> debug Read the first 1000 characters: {repr(content)}") - # f.seek(0) # Reset the file pointer - - # 使用制表符作为分隔符,因为这是TSV文件 - # Use tabs as delimiters because this is a TSV file. + with open(csv_path, newline='') as f: reader = csv.DictReader(f, delimiter='\t') - print(f"> debug CSV列名: {reader.fieldnames}") + logger.debug(f"[AICB优化] CSV列名: {reader.fieldnames}") - # 检查是否正确解析了列名 - # Check if column names were parsed correctly if reader.fieldnames and len(reader.fieldnames) == 1: - # 如果列名没有正确分割,尝试手动分割 - # If column names weren't split correctly, try manual splitting actual_fieldnames = reader.fieldnames[0].split('\t') if 'layer_id' in actual_fieldnames and 'layer_name' in actual_fieldnames: - print("> debug Detected tab-separated column names, reprocessing") f.seek(0) lines = f.readlines() - # 手动解析 - # Manual parsing headers = lines[0].strip().split('\t') - print(f"> debug Parsed column names manually:: {headers}") for line_num, line in enumerate(lines[1:], 1): values = line.strip().split('\t') if len(values) == len(headers): row = dict(zip(headers, values)) - # print(f"> debug Row {row_num} data: {row}") - layer_id = int(row['layer_id']) layer_name = row['layer_name'] comp_time = float(row['comp_time']) @@ -321,27 +1142,15 @@ def _load_aicb_data(self) -> Dict[int, Dict[str, Dict[str, float]]]: 'comp_time': comp_time, 'comm_size': comm_size } - print("> debug Manual parsing completed") else: - print("> debug Failed to parse column names correctly") return {} else: - # 正常的CSV解析流程 - # Normal CSV parsing process for row_num, row in enumerate(reader, 1): - # print(f"> debug Row {row_num} data: {row}") - - # 检查必要的键是否存在 - # Check if required keys exist - if 'layer_id' not in row or 'layer_name' not in row or 'comp_time' not in row or 'comm_size' not in row: - print(f"Warning: Row {row_num} missing required columns, skipping") + if 'layer_id' not in row or 'layer_name' not in row: continue - + layer_id = int(row['layer_id']) layer_name = row['layer_name'] - - # 单位:微秒(根据示例) - # Unit: microseconds (based on example) comp_time = float(row['comp_time']) comm_size = float(row['comm_size']) @@ -352,16 +1161,12 @@ def _load_aicb_data(self) -> Dict[int, Dict[str, Dict[str, float]]]: 'comm_size': comm_size } except Exception as e: - print(f"Error reading CSV file: {e}") - import traceback - traceback.print_exc() + logger.error(f"读取CSV文件失败: {e}", exc_info=True) return {} - self._aicb_data = data - # print(f"> debug Successfully loaded data: {data}") + logger.info(f"[AICB优化] 成功解析CSV: {len(data)} layers from {csv_path}") return data - def _get_block_execution_time(self) -> float: return ( self._get_attention_layer_execution_time() @@ -375,18 +1180,10 @@ def _get_block_execution_time_by_layer_id(self, layer_id: int = 0) -> float: # 根据模型类型确定使用的层类型 # Determine layer type based on model - # if self._replica_config.model_name in ['qwen3-moe-235B']: - # layer_time = self._get_moe_layer_execution_time_from_aicb(layer_id) - # else: - # layer_time = self._get_mlp_layer_execution_time_from_aicb(layer_id) - - # assert att_time >= 0 and layer_time >= 0, f"> debug" - # return att_time + layer_time - att_time = self._get_attention_layer_execution_time_from_aicb(layer_id) mlp_time = self._get_mlp_layer_execution_time_from_aicb(layer_id) moe_time = self._get_moe_layer_execution_time_from_aicb(layer_id) - assert att_time >=0 and mlp_time>=0 and moe_time >= 0, f"> debug" + assert att_time >= 0 and mlp_time >= 0 and moe_time >= 0, "AICB layer times must be non-negative" return att_time + mlp_time + moe_time else: @@ -498,8 +1295,8 @@ def model_time(self) -> float: # 计算当前 pipeline stage 包含的 layer_id 范围 # Calculate the range of layer_ids included in the current pipeline stage - # > TODO: 找_pipeline_stage_id 在哪, 结合batch id - # > TODO: Find where _pipeline_stage_id is defined and integrate with batch id + # TODO(tianhao909): locate _pipeline_stage_id and integrate with batch id + # TODO(tianhao909): 找 _pipeline_stage_id 在哪,结合 batch id if self._replica_config.num_pipeline_stages == 1: self._pipeline_stage_id = 0 start_layer = self._pipeline_stage_id * self._num_layers_per_pipeline_stage diff --git a/vidur-alibabacloud/vidur/entities/replica.py b/vidur-alibabacloud/vidur/entities/replica.py index bfb29ce4..412dafb7 100644 --- a/vidur-alibabacloud/vidur/entities/replica.py +++ b/vidur-alibabacloud/vidur/entities/replica.py @@ -1,4 +1,5 @@ from math import ceil +from typing import Tuple from vidur.config import BaseRequestGeneratorConfig, ReplicaConfig from vidur.entities.base_entity import BaseEntity @@ -15,8 +16,8 @@ class ReplicaType(IntEnum): # Define task type enumeration class, inheriting fr DECODE = 2 # Token task (generation stage) -# Replica是一个模型实体,即一个DP单位 # Replica represents a model entity, which is a Data Parallelism (DP) unit +# Replica是一个模型实体,即一个DP单位 class Replica(BaseEntity): def __init__( self, @@ -39,12 +40,12 @@ def __init__( == 0 ) - # > sw - # TODO > Decouple this from replica, as vidur itself is decoupled from it + # TODO(tianhao909): decouple pending_requests from replica + # TODO(tianhao909): 将 pending_requests 从 replica 中解耦 # self._pending_requests = [] self.pending_requests = [] self._pending_tasks = [] - # > scheduler metadata + # Scheduler metadata / 调度器元数据 # self.sched_memory = self.model.size.total_size # Memory usage from scheduler's perspective self.sched_memory = self._device_config.total_memory_gb self.sched_pending_tokens = 0 # Number of pending tokens from scheduler's perspective @@ -61,6 +62,13 @@ def __init__( self.pd_node_ratio = self._replica_config.pd_node_ratio self.nvlink_bandwidth = self._replica_config.nvlink_bandwidth self.rdma_bandwidth = self._replica_config.rdma_bandwidth + + # New variables: track KV cache memory usage + # 新增变量:跟踪kvcache显存使用情况 + self._allocated_kv_cache_memory = 0 # Allocated KV cache memory (bytes) / 已分配的kvcache显存 + self._max_kv_cache_memory = None # Max KV cache capacity (bytes) / 最大kvcache显存容量 + self._kv_cache_allocation_map = {} # Track per-request KV cache allocation / 跟踪每个请求分配的kvcache大小 + @property def id(self) -> int: @@ -146,7 +154,169 @@ def per_device_flops(self) -> float: @property def pending_tasks(self) -> list: return self._pending_tasks + + def get_kv_cache_per_token(self) -> int: + """ + Calculate per-token KV Cache size (unit: Bytes). + 计算每个token的KV Cache大小 (单位: Bytes) + + Formula / 公式: 2 * num_kv_heads * head_dim * num_layers * bytes_per_element + + Returns: + int: Per-token KV Cache size (Bytes) + """ + # Determine bytes per element / 确定每个元素的字节数 + dtype_to_bytes = { + 'float16': 2, 'bfloat16': 2, + 'float32': 4, 'float64': 8, + 'fp8': 1, 'int8': 1, + 'int16': 2, 'int32': 4, 'int64': 8 + } + bytes_per_element = dtype_to_bytes.get(self.pd_p2p_comm_dtype, 2) + + # KV Cache size per token / KV Cache每 token的大小 + kv_cache_per_token = ( + 2 # K和V两个缓存 + * self.num_kv_heads # KV heads数量 + * self.attention_head_dim # 每个head的维度 + * self.num_layers # 层数 + * bytes_per_element # 每个元素的字节数 + ) + return kv_cache_per_token + + def get_remaining_kv_cache_capacity(self, avg_tokens_per_request=None) -> Tuple[int, int]: + """ + Calculate remaining KV cache memory capacity and how many requests it can serve. + 计算当前副本剩余的kvcache显存容量,以及还能容纳多少个request + + Args: + avg_tokens_per_request: Avg tokens per request (default: max_request_tokens) + 每个请求的平均token数 + + Returns: + (remaining_kv_cache_bytes, remaining_request_capacity) + """ + from vidur.scheduler.utils.memory_planner import MemoryPlanner + memory_planner = MemoryPlanner(self._replica_config, self) + + # ===== 1. Init max KV cache capacity (computed on first call) ===== + # ===== 1. 初始化最大kvcache容量 (首次调用时计算) ===== + if self._max_kv_cache_memory is None: + # Get real KV cache available memory (bytes) from memory_planner + # 直接从 memory_planner 获取真实的 KV cache 可用内存 (bytes) + # Correct calculation: available memory - model parameter memory + # 这是正确的计算: 可用内存 - 模型参数内存 + self._max_kv_cache_memory = memory_planner.get_kv_cache_available_memory() + + # Compute per-request KV cache for display + # 计算每请求 KV cache 用于显示 + kv_cache_per_token = self.get_kv_cache_per_token() + tokens_per_req = avg_tokens_per_request or self.max_request_tokens + kv_cache_per_request = kv_cache_per_token * tokens_per_req + max_requests = int(self._max_kv_cache_memory / kv_cache_per_request) if kv_cache_per_request > 0 else 0 + + logger.info(f"[Replica] KV Cache Capacity Init (KV Cache容量初始化):") + logger.info(f" Total GPU mem (GPU总内存): {self.total_memory_gb:.2f} GB") + logger.info(f" Mem margin (内存保留比例): {self.memory_margin_fraction*100:.1f}%") + logger.info(f" Max KV cache capacity (最大KV cache容量): {self._max_kv_cache_memory/(1024**3):.2f} GB") + logger.info(f" KV cache per token (每token KV cache): {kv_cache_per_token} bytes = {kv_cache_per_token/1024:.2f} KB") + logger.info(f" Avg tokens per req (每请求平均token数): {tokens_per_req}") + logger.info(f" KV cache per req (每请求KV cache): {kv_cache_per_request/(1024**3):.4f} GB") + logger.info(f" Max servable reqs (最大可服务请求数): {max_requests}") + + # ===== 2. Compute remaining KV cache memory ===== + # ===== 2. 计算剩余kvcache显存 ===== + remaining_kv_cache = self._max_kv_cache_memory - self._allocated_kv_cache_memory + + # ===== 3. Compute remaining request capacity ===== + # ===== 3. 计算剩余容量可服务的请求数 ===== + # Unified calculation: per-token KV cache * avg tokens per request + # 使用统一的计算方式 + kv_cache_per_token = self.get_kv_cache_per_token() + tokens_per_req = avg_tokens_per_request or self.max_request_tokens + kv_cache_per_request = kv_cache_per_token * tokens_per_req + + if kv_cache_per_request > 0: + remaining_request_capacity = int(remaining_kv_cache / kv_cache_per_request) + else: + remaining_request_capacity = 0 + + # ===== 4. Print debug info ===== + # ===== 4. 打印调试信息 ===== + logger.debug(f"Remaining KV cache: {remaining_kv_cache / (1024**3):.2f} GB ({remaining_kv_cache / (1024**2):.2f} MB)") + logger.debug(f"Per-request KV cache: {kv_cache_per_request/(1024**3):.4f} GB ({tokens_per_req} tokens)") + logger.debug(f"Remaining request capacity: {remaining_request_capacity}") + + return remaining_kv_cache, remaining_request_capacity + + def release_request_kv_cache_memory(self, request) -> None: + """ + Release KV cache memory occupied by the specified request. + 释放指定request占用的kvcache显存 + """ + # Get KV cache size occupied by this request from allocation map + # 从分配映射中获取这个request占用的kvcache大小 + if request.id in self._kv_cache_allocation_map: + kv_cache_size = self._kv_cache_allocation_map[request.id] + assert kv_cache_size > 0, f"fth debug: request {request.id} kv cache size should be positive" + + # Subtract this request's KV cache from allocated total + # 从已分配的kvcache中减去这个request的占用 + self._allocated_kv_cache_memory = max(0, self._allocated_kv_cache_memory - kv_cache_size) + + # Remove this request from allocation map + # 从分配映射中移除这个请求 + del self._kv_cache_allocation_map[request.id] + + logger.debug(f"Released KV cache for request {request.id}: {kv_cache_size / (1024**3):.2f} GB ({kv_cache_size / (1024**2):.2f} MB)") + else: + logger.warning(f"Request {request.id} not found in KV cache allocation map") + + # def allocate_request_kv_cache_memory(self, request, num_blocks): + # """ + # 为指定request分配kvcache显存,使用类似_allocation_map的方式跟踪每个请求的分配情况 + # 根据分配的块数来计算kvcache大小 + # """ + # # 根据分配的块数计算这个request占用的kvcache大小 + # kv_cache_size = request.estimate_kv_cache_size(num_blocks, self) + def allocate_request_kv_cache_memory(self, request, num_blocks, block_size) -> None: + """ + Allocate KV cache memory for a request, tracking per-request allocation. + 为指定request分配kvcache显存,跟踪每个请求的分配情况 + + Previously num_blocks was passed directly as num_tokens, + causing KV cache tracking to be underestimated by block_size times. + Now correctly converts num_blocks * block_size to num_tokens. + 之前 num_blocks 直接作为 num_tokens 传入,导致跟踪量被低估 block_size 倍。 + + Args: + request: Request object / 请求对象 + num_blocks: Number of allocated memory blocks / 分配的内存块数 + block_size: Tokens per block / 每个块包含的token数 + """ + # Correct conversion: num_tokens = num_blocks * block_size + # 正确转换 + num_tokens = num_blocks * block_size + kv_cache_size = request.estimate_kv_cache_size(num_tokens, self) + logger.debug(f"allocate_request_kv_cache_memory: " + f"req={request.id}, num_blocks={num_blocks}, block_size={block_size}, " + f"num_tokens={num_tokens}, kv_cache_size={kv_cache_size/(1024**2):.2f} MB") + + # Update allocation map / 更新分配映射 + if request.id not in self._kv_cache_allocation_map: + self._kv_cache_allocation_map[request.id] = kv_cache_size + else: + # If already allocated, accumulate (for incremental allocation) + # 如果已有分配,则累加 + self._kv_cache_allocation_map[request.id] += kv_cache_size + + # Increase allocated KV cache / 增加已分配的kvcache + self._allocated_kv_cache_memory += kv_cache_size + + logger.debug(f"Allocated KV cache for request {request.id}: {kv_cache_size / (1024**3):.2f} GB, " + f"total allocated: {self._allocated_kv_cache_memory / (1024**3):.2f} GB") + def to_dict(self) -> dict: return { "id": self.id, @@ -162,7 +332,7 @@ def to_dict(self) -> dict: } - def add_to_pool(self, task): + def add_to_pool(self, task) -> None: """ Add a Task to the request pool. Request pool is ordered by request arrival time. @@ -171,7 +341,7 @@ def add_to_pool(self, task): # bisect.insort(): Uses binary search algorithm to insert element into sorted list, maintaining list's sorted state # self.pending_requests: Target list storing all pending requests # task.request: Request object to be inserted - # key=lambda x: x.arrival_timestamp: Sort key function, sorting by request arrival timestamp + # key=lambda x: x.arrival_timestamp: Sort key function, sorting by request arrival time # lambda x: x.arrival_timestamp is an anonymous function that accepts a parameter x (request object) and returns its arrival_timestamp attribute # This ensures the pending_requests list is always sorted by request arrival time if task.request not in self.pending_requests: # If request is not in current pool diff --git a/vidur-alibabacloud/vidur/entities/request.py b/vidur-alibabacloud/vidur/entities/request.py index d528f14a..46d2e2a0 100644 --- a/vidur-alibabacloud/vidur/entities/request.py +++ b/vidur-alibabacloud/vidur/entities/request.py @@ -3,7 +3,6 @@ from vidur.entities.base_entity import BaseEntity from vidur.logger import init_logger -# > from vidur.entities.task import Task import networkx as nx from vidur.entities.flow import Flow @@ -69,8 +68,7 @@ def __init__( self._num_restarts = 0 - # >: Add DAG property - # self.dag: nx.DiGraph = field(default_factory=nx.DiGraph) + # DAG property for PD separation self.dag = nx.DiGraph() self.node_id = 0 self.nodes = {} @@ -91,7 +89,7 @@ def __init__( self.pd_p2p_bytes_per_token = None self.pd_p2p_comm_dtype = None - # > add: Convenient for obtaining the replica corresponding to decode_replica_id through global_scheduler + # Reference to global_scheduler for obtaining decode replica self.global_scheduler = None @@ -265,15 +263,6 @@ def on_batch_end( # Absolute time self._latest_iteration_completed_at = time - # if self._num_processed_tokens == self.total_tokens: - # print(f"> Debug: ") - - # print(f"> Debug: req on_batch_end Request {self._id} processed {num_tokens_processed} tokens, \ - # total processed {self._num_processed_tokens} tokens.") - # print(f"> Debug: req on_batch_end num_processed_tokens={self._num_processed_tokens}, \ - # total_tokens={self.total_tokens} request_type={self.request_type}") - # print(f"> Debug: req on_batch_end At time={time}, \ - # this request's self._completed_at={self._completed_at} self._completed={self._completed}") assert self._num_processed_tokens <= self.total_tokens @@ -282,40 +271,28 @@ def on_batch_end( if self._num_processed_tokens == self._num_prefill_tokens: self._is_prefill_complete = True - # > self.request_type = RequestType.DECODE # we get one decode token when the prefill processing completes self._num_processed_tokens += 1 - # print(f"> Debug: self._num_processed_tokens += 1 \ - # Request {self._id} processed {num_tokens_processed} tokens, \ - # total processed {self._num_processed_tokens} tokens") # we must record the prefill completion time only in the first time # in the subsequent restarts, we keep adding the previously decoded # tokens to the prefill tokens - that is irrelevant to the original prefill if self._prefill_completed_at == 0: - # > At this point it is absolute time, + # Record absolute time of prefill completion self._prefill_completed_at = time # Here; decode batching # elif self._num_processed_tokens == self._num_prefill_tokens: elif self._num_processed_tokens > self._num_prefill_tokens : - # > - assert self._is_prefill_complete == True, "> debug" - assert self.request_type == RequestType.DECODE, "> debug" - - # we get one decode token when the prefill processing completes - # self._num_processed_tokens += 1 - # print(f"> Debug: Request {self._id} at this point _num_processed_tokens > _num_prefill_tokens, \ - # total processed {self._num_processed_tokens} tokens") + assert self._is_prefill_complete == True, "prefill must be complete at this point" + assert self.request_type == RequestType.DECODE, "request type must be DECODE at this point" elif self._num_processed_tokens < self._num_prefill_tokens: - # print(f"> Debug: Request {self._id} at this point _num_processed_tokens < _num_prefill_tokens, \ - # total processed {self._num_processed_tokens} tokens") pass # check if request is completed @@ -323,18 +300,15 @@ def on_batch_end( self._completed_at = time self._completed = True self.decode_time = self._completed_at - self.prefill_completed_at - assert self.decode_time > 0 and self.decode_time < float("inf") , "> Debug: decode time error" - # print(f"> Debug: At this point the request should end!!, \ - # Request {self._id} completed at {self._completed_at} ") + assert self.decode_time > 0 and self.decode_time < float("inf"), "decode_time must be positive and finite" logger.debug(f"Request {self._id} completed at {self._completed_at}") if self._num_processed_tokens >= self._num_prefill_tokens: - # print(f"> Debug: request ID={self._id} self.decode_arrived_at={self.decode_arrived_at} self.request_type={self.request_type} self.prefill_completed_at={self.prefill_completed_at} self._is_prefill_complete={self._is_prefill_complete}") - # assert self.decode_arrived_at < float("inf") and self.request_type == RequestType.DECODE and self.prefill_completed_at > 0 and self._is_prefill_complete == True, "> debug" - assert self.request_type == RequestType.DECODE and self.prefill_completed_at > 0 and self._is_prefill_complete == True, "> debug" + assert self.request_type == RequestType.DECODE and self.prefill_completed_at > 0 and self._is_prefill_complete == True, \ + "post-prefill request must be DECODE with valid prefill_completed_at" @@ -346,8 +320,7 @@ def on_batch_stage_schedule( if self._latest_stage_completed_at == 0: self._preempted_time = 0 else: - # TODO > fy test each time - # print(f"> Debug: request_id={self._id} time={time} self._latest_stage_completed_a={self._latest_stage_completed_at}") + # TODO: verify preempted_time calculation each iteration self._preempted_time += time - self._latest_stage_completed_at self._preempted = False @@ -402,16 +375,11 @@ def restart(self): self._num_restarts += 1 - # > def create_task(self, task_type, **kwargs): """ Creates a Task and adds it to the DAG. """ - # task = Task.from_type(task_type=task_type, - # node_id=next(self.node_id), - # request=self, - # **kwargs) task = Task.from_type(task_type=task_type, node_id=self.node_id, request=self, @@ -419,19 +387,12 @@ def create_task(self, task_type, **kwargs): self.node_id += 1 self.dag.add_node(task) self.nodes[task.node_id] = task - # print(f"> self.dag={self.dag} self.nodes={self.nodes}") - # print(f"> self.dag={self.dag} ") - # import pdb; pdb.set_trace() # > return task def create_flow(self, flow_type, **kwargs): """ Create a flow and add it to the DAG. """ - # flow = Flow.from_type(flow_type=flow_type, - # node_id=next(self.node_id), # Generate unique node ID - # request=self, - # **kwargs) # Create flow based on flow type flow = Flow.from_type(flow_type=flow_type, node_id=self.node_id, # Generate unique node ID request=self, @@ -441,65 +402,74 @@ def create_flow(self, flow_type, **kwargs): self.nodes[flow.node_id] = flow # Add flow to node dictionary return flow # Return created flow - # > def successors(self, node): """ Returns the next Task or Flow to be executed after node. """ return self.dag.successors(node) - # estimate_kv_cache_size - # def estimate_kv_cache_size(self, num_tokens=None, model=None): def estimate_kv_cache_size(self, num_tokens=None, replica=None): """ - 返回生成num_tokens后的KV缓存大小。 - 需要请求的根节点分配到某个实例上。 - Returns the KV-cache size after generating num_tokens - Requires the Request root node to be allocated on an Instance. + Calculate KV Cache size for the given number of tokens (unit: Bytes). + 计算指定token数量的KV Cache大小 (单位: Bytes) + + KV Cache formula / 公式: + kv_cache_size = 2 (K+V) * num_tokens * num_kv_heads * head_dim * num_layers * bytes_per_element + + Args: + num_tokens: Token count (prefill_tokens + decode_tokens) + replica: Replica instance with model config + + Returns: + int: KV Cache size (Bytes) """ - # if num_tokens is None: # If num_tokens is not specified - # num_tokens = self.generated_tokens # Use the number of generated tokens - # if model is None: # If model is not specified - # # model = self.root_node.instance.model # Use root node's model - # model = self.root_node.replica.model # Use root node's model - - # return 2 * self.batch_size * num_tokens * model.architecture.hidden_size \ - # * model.architecture.num_layers * model.size.dtype_size # Calculate KV cache size - # return 2 * self.batch_size * num_tokens * replica.mlp_hidden_dim \ - # * replica.num_layers * replica.size.dtype_size # Calculate KV cache size - # TODO :p2p > self.batch_size and replica.size.dtype_size from vidur - # Point-to-point communication padding; Global parameters; Comm size/bandwidth; - # TODO Another version of ns3; Support writing a stream in config; For later + # ===== 1. Determine bytes per element (by data type) ===== + # ===== 1. 确定每个元素的字节数 (根据数据类型) ===== + dtype_to_bytes = { + 'float16': 2, 'bfloat16': 2, + 'float32': 4, 'float64': 8, + 'fp8': 1, 'int8': 1, + 'int16': 2, 'int32': 4, 'int64': 8 + } + bytes_per_element = dtype_to_bytes.get(replica.pd_p2p_comm_dtype, 2) # Default 2 bytes / 默认2字节 - if replica.pd_p2p_comm_dtype == 'float16': - pd_p2p_bytes_per_token = 2 - elif replica.pd_p2p_comm_dtype == 'float32': - pd_p2p_bytes_per_token = 4 - elif replica.pd_p2p_comm_dtype == 'float64': - pd_p2p_bytes_per_token = 8 - elif replica.pd_p2p_comm_dtype == 'bfloat16': - pd_p2p_bytes_per_token = 2 - elif replica.pd_p2p_comm_dtype == 'int8': - pd_p2p_bytes_per_token = 1 - elif replica.pd_p2p_comm_dtype == 'int16': - pd_p2p_bytes_per_token = 2 - elif replica.pd_p2p_comm_dtype == 'int32': - pd_p2p_bytes_per_token = 4 - elif replica.pd_p2p_comm_dtype == 'int64': - pd_p2p_bytes_per_token = 8 - - self.pd_p2p_bytes_per_token = pd_p2p_bytes_per_token + # Save to instance for reuse elsewhere + # 保存到实例属性供其他地方使用 + self.pd_p2p_bytes_per_token = bytes_per_element self.pd_p2p_comm_dtype = replica.pd_p2p_comm_dtype - assert self.pd_p2p_bytes_per_token is not None and self.pd_p2p_comm_dtype is not None, "> Debug: PD P2P dtype is not set" + # ===== 2. Get KV Cache related dimensions ===== + # ===== 2. 获取KV Cache相关维度 ===== + # Use correct KV cache dims: num_kv_heads * attention_head_dim + # 使用正确的KV cache维度: num_kv_heads * attention_head_dim + # (NOT mlp_hidden_dim, which is MLP's dimension) + # (而不是mlp_hidden_dim, 那是MLP的维度) + num_kv_heads = replica.num_kv_heads + head_dim = replica.attention_head_dim # embedding_dim // num_q_heads + num_layers = replica.num_layers - # TODO : >: double check this - return 2 * num_tokens * replica.mlp_hidden_dim \ - * replica.num_layers * pd_p2p_bytes_per_token # Calculate KV cache size - + # ===== 3. Calculate KV Cache size ===== + # ===== 3. 计算KV Cache大小 ===== + # Formula: 2(K+V) * num_tokens * num_kv_heads * head_dim * num_layers * bytes_per_element + # 公式同上 + kv_cache_size = ( + 2 # K和V两个缓存 + * num_tokens # token数量 + * num_kv_heads # KV heads数量 + * head_dim # 每个head的维度 + * num_layers # 层数 + * bytes_per_element # 每个元素的字节数 + ) + + # ===== 4. Print debug info (first call only) ===== + # ===== 4. 打印调试信息 (首次调用时) ===== + if not hasattr(self, '_kv_cache_debug_printed'): + logger.debug(f"[KV Cache] params: num_tokens={num_tokens}, num_kv_heads={num_kv_heads}, " + f"head_dim={head_dim}, num_layers={num_layers}, bytes={bytes_per_element}") + logger.debug(f"[KV Cache] result: {kv_cache_size} bytes = {kv_cache_size/(1024**3):.4f} GB") + self._kv_cache_debug_printed = True - # return 2 * num_tokens * replica.mlp_hidden_dim \ - # * replica.num_layers # Calculate KV cache size + return kv_cache_size diff --git a/vidur-alibabacloud/vidur/events/batch_end_event.py b/vidur-alibabacloud/vidur/events/batch_end_event.py index 6d9f7bae..f60ced53 100644 --- a/vidur-alibabacloud/vidur/events/batch_end_event.py +++ b/vidur-alibabacloud/vidur/events/batch_end_event.py @@ -42,8 +42,8 @@ def handle_event( - print(f"> Debug: time={self._time} Generates ReplicaScheduleEvent from event {self._id} {self._event_type}, \ - replica_id={self._replica_id}") + logger.debug(f"time={self._time} Generates ReplicaScheduleEvent from event {self._id} {self._event_type}, " + f"replica_id={self._replica_id}") # replica继续将下一个micro-batch加入pipeline # replica continues to add the next micro-batch to the pipeline @@ -56,7 +56,8 @@ def handle_event( # >: Previous code was native vidur; PD separation is added processing; Without PD separation, it won't enter the following path # Check if Splitwise scheduling policy is used - # TODO 250911 > test if non-PD separation works normally + # TODO(tianhao909): test if non-PD separation works normally + # TODO(tianhao909): 测试非 PD 分离模式是否正常工作 if hasattr(scheduler, '__class__') and scheduler.__class__.__name__ == 'SplitwiseGlobalScheduler': # 对于批次中的每个请求,检查是否需要转移到D副本 # For each request in the batch, check if it needs to be transferred to D replica @@ -76,15 +77,16 @@ def handle_event( # request.request_type = RequestType.DECODE - # TODO: > 在这里添加P2P传输带宽时延开销 - # TODO: > Add P2P transmission bandwidth delay overhead here + # TODO(tianhao909): add P2P transmission bandwidth delay overhead here + # TODO(tianhao909): 在这里添加 P2P 传输带宽时延开销 # transfer_delay = calculate_p2p_transfer_delay(request) # request.decode_arrived_at += transfer_delay # transfer_delay = 1 # > assumption # transfer_delay = 10 # > assumption # request.pd_p2p_comm_size = request.estimate_kv_cache_size() - assert request.num_processed_tokens == request.num_prefill_tokens + 1 , "> debug" + assert request.num_processed_tokens == request.num_prefill_tokens + 1, \ + "processed tokens must equal prefill tokens + 1 at this point" request.pd_p2p_comm_size = request.estimate_kv_cache_size( request.num_processed_tokens, replica_scheduler.replica) # replica_scheduler.replica @@ -92,12 +94,13 @@ def handle_event( # transfer_delay = request.pd_p2p_comm_size / (request.bandwidth - request.bandwidth_used) # transfer_delay = request.pd_p2p_comm_size / request.bandwidth - # TODO >: request.bandwidth 具体怎么赋值, 怎么传,应该是个topo; 或者考虑竞争? - # TODO >: How exactly is request.bandwidth assigned and passed, should be a topology; Or consider contention? + # TODO(tianhao909): determine bandwidth from topology with contention modeling + # TODO(tianhao909): bandwidth 应该从 topo 获取,并考虑竞争 # request.pd_p2p_comm_bandwidth = 400*1024*1024*1024 request.pd_p2p_comm_bandwidth = replica_scheduler.replica.pd_p2p_comm_bandwidth*1024*1024*1024/8 - assert request.pd_p2p_comm_size < float('inf') and request.pd_p2p_comm_size > 0 and request.pd_p2p_comm_bandwidth > 0 , "> debug" + assert request.pd_p2p_comm_size < float('inf') and request.pd_p2p_comm_size > 0 and request.pd_p2p_comm_bandwidth > 0, \ + "P2P communication size and bandwidth must be valid" request.pd_p2p_comm_time = request.pd_p2p_comm_size / request.pd_p2p_comm_bandwidth @@ -108,17 +111,27 @@ def handle_event( # 从P副本中删除请求 # Remove request from P replica - # TODO: > 250911 写两个req p 和 d的token数目都很少; 测试内存判断的逻辑对不对;整体等逻辑对不对 - # TODO: > 250911 Write two requests with few tokens for both p and d; Test if memory judgment logic is correct; Overall logic correctness + # TODO(tianhao909): write small-token test cases for memory logic validation + # TODO(tianhao909): 写两个 req p 和 d 的 token 数目都很少;测试内存判断的逻辑对不对 # > 隐患 replica 清除 req时候, 对应的内存块也要清除 # > risk: When replica clears requests, corresponding memory blocks should also be cleared p_replica_scheduler = replica_scheduler if request in p_replica_scheduler.replica.pending_requests: + # 在移除请求之前,先计算当前的kvcache使用情况 + # print(f"> 在移除请求 {request.id} 之前:") + # p_replica_scheduler.replica.get_remaining_kv_cache_capacity() + + # 移除请求 p_replica_scheduler.replica.pending_requests.remove(request) - # TODO:> 确保对应的存储也清空了 - # TODO: > Ensure corresponding storage is also cleared + # 移除请求后释放相应的显存 + # p_replica_scheduler.replica.release_request_kv_cache_memory(request) + # print(f"> 请求 {request.id} 已从Prefill副本移除并释放显存") + # p_replica_scheduler.replica.get_remaining_kv_cache_capacity() + + # TODO(tianhao909): ensure corresponding storage is also cleared + # TODO(tianhao909): 确保对应的存储也清空了 # 将请求添加到D副本,获取对应的D副本并添加请求 # Add request to D replica, get corresponding D replica and add request @@ -130,21 +143,150 @@ def handle_event( # Generate D replica scheduling event events.append(ReplicaScheduleEvent(request.decode_arrived_at, request.decode_replica_id)) - print(f"> Debug: pd d-path time={self._time} Generates ReplicaScheduleEvent from event {self._id} {self._event_type}, \ - decode_replica_id={request.decode_replica_id} len(events)={len(events)}") + logger.debug(f"pd d-path time={self._time} Generates ReplicaScheduleEvent from event {self._id} {self._event_type}, " + f"decode_replica_id={request.decode_replica_id} len(events)={len(events)}") if request._num_processed_tokens >= request._num_prefill_tokens: # print(f"> self.decode_arrived_at={self.decode_arrived_at} self.request_type={self.request_type} self.prefill_completed_at={self.prefill_completed_at} self._is_prefill_complete={self._is_prefill_complete}") - assert request.decode_arrived_at < float("inf") and request.request_type == RequestType.DECODE and request.prefill_completed_at > 0 and request._is_prefill_complete == True, "> debug" + assert request.decode_arrived_at < float("inf") and request.request_type == RequestType.DECODE and request.prefill_completed_at > 0 and request._is_prefill_complete == True, \ + "post-prefill request must have valid decode_arrived_at and be in DECODE state" + # Call memory info logging function (disabled) + # 调用显存信息日志函数(已禁用) + # self._log_memory_info(scheduler) return events + def _log_memory_info(self, scheduler: BaseGlobalScheduler) -> None: + """ + Get and print memory capacity info for prefill and decode replicas. + 获取并打印prefill和decode副本的各种显存容量信息 + """ + # Get all replicas from scheduler + # 获取scheduler中所有的replica + # Use scheduler._replica_schedulers to get all replica IDs + # 使用scheduler的_replica_schedulers属性获取所有副本ID + replica_ids = list(scheduler._replica_schedulers.keys()) + + # Separate prefill and decode replica info + # 分别记录prefill和decode副本的信息 + prefill_replica_info = {} + decode_replica_info = {} + + for replica_id in replica_ids: + replica_scheduler = scheduler.get_replica_scheduler(replica_id) + replica = replica_scheduler.replica + + # Get TP and PP parameters / 获取TP和PP参数 + tensor_parallel_size = replica._replica_config.tensor_parallel_size + pipeline_parallel_size = replica._replica_config.num_pipeline_stages + + # Create param_counter from replica_config + # 从replica_config创建param_counter + param_counter = replica._replica_config._param_counter if hasattr(replica._replica_config, '_param_counter') else None + if param_counter is None: + # If replica has no _param_counter, create from replica_config + # 如果replica本身没有_param_counter,尝试从replica_config创建 + from vidur.utils.param_counter import ParamCounter + param_counter = ParamCounter(replica._replica_config) + + # Get model params memory usage / 获取模型参数占用的显存 + total_params = param_counter.get_num_parameters_per_device() + # Convert bytes to GB / 将bytes转换为GB + total_params_gb = total_params / (1024**3) + + # Create memory_planner from replica_config and replica + # 从replica_config和replica创建memory_planner + from vidur.scheduler.utils.memory_planner import MemoryPlanner + memory_planner = MemoryPlanner(replica._replica_config, replica) + + # Get reserved KV cache memory / 获取kvcache预留的显存 + max_batch_size = memory_planner.get_max_batch_size() + kv_cache_per_request = memory_planner._get_kv_cache_memory_per_device_per_request() + memory_for_kv_cache = kv_cache_per_request * max_batch_size + + # Convert bytes to GB / 将bytes转换为GB + memory_for_kv_cache_gb = memory_for_kv_cache / (1024**3) + + # Get actual running requests' KV cache memory + # Note: Replica has no running_requests attr, so we only count pending_requests + # 获取实际运行的request的kvcache显存容量 + # 注意:Replica对象没有running_requests属性,只统计pending_requests + pending_requests_count = len(replica.pending_requests) + actual_kv_cache_memory = kv_cache_per_request * pending_requests_count + actual_kv_cache_memory_gb = actual_kv_cache_memory / (1024**3) + + # Compute whole-replica values (per-GPU * TP * PP) + # 计算整个replica的值(单GPU值 × TP × PP) + total_memory_replica_gb = replica.total_memory_gb * tensor_parallel_size * pipeline_parallel_size + params_memory_replica_gb = total_params_gb * tensor_parallel_size * pipeline_parallel_size + reserved_kv_cache_memory_replica_gb = memory_for_kv_cache_gb * tensor_parallel_size * pipeline_parallel_size + actual_running_kv_cache_memory_replica_gb = actual_kv_cache_memory_gb * tensor_parallel_size * pipeline_parallel_size + + # Store info / 存储信息 + replica_info = { + 'total_memory_gb': replica.total_memory_gb, + 'total_memory_replica_gb': total_memory_replica_gb, + 'params_memory_gb': total_params_gb, + 'params_memory_replica_gb': params_memory_replica_gb, + 'reserved_kv_cache_memory_gb': memory_for_kv_cache_gb, + 'reserved_kv_cache_memory_replica_gb': reserved_kv_cache_memory_replica_gb, + 'actual_running_kv_cache_memory_gb': actual_kv_cache_memory_gb, + 'actual_running_kv_cache_memory_replica_gb': actual_running_kv_cache_memory_replica_gb, + 'active_requests_count': pending_requests_count, + 'max_batch_size': max_batch_size, + 'tp': tensor_parallel_size, + 'pp': pipeline_parallel_size + } + + if replica.replica_type == ReplicaType.PREFILL: + prefill_replica_info[replica_id] = replica_info + elif replica.replica_type == ReplicaType.DECODE: + decode_replica_info[replica_id] = replica_info + + # 打印信息 | Print memory info + logger.info("=" * 100) + logger.info("Memory Usage Statistics (GB) (显存使用情况统计):") + logger.info("-" * 100) + + if prefill_replica_info: + logger.info("Prefill Replica Memory Info (Prefill副本显存信息):") + for pid, info in prefill_replica_info.items(): + logger.info(f" Replica ID {pid} (TP={info['tp']}, PP={info['pp']}):") + logger.info(f" Per-GPU total mem (单GPU总显存容量): {info['total_memory_gb']:.2f} GB ({info['total_memory_gb']*1024:.2f} MB)") + logger.info(f" Replica total mem (整个Replica总显存容量): {info['total_memory_replica_gb']:.2f} GB ({info['total_memory_replica_gb']*1024:.2f} MB)") + logger.info(f" Per-GPU model params (单GPU模型参数占用显存): {info['params_memory_gb']:.2f} GB ({info['params_memory_gb']*1024:.2f} MB)") + logger.info(f" Replica model params (整个Replica模型参数占用显存): {info['params_memory_replica_gb']:.2f} GB ({info['params_memory_replica_gb']*1024:.2f} MB)") + logger.info(f" Per-GPU reserved KV cache (单GPU预留kvcache显存): {info['reserved_kv_cache_memory_gb']:.2f} GB ({info['reserved_kv_cache_memory_gb']*1024:.2f} MB)") + logger.info(f" Replica reserved KV cache (整个Replica预留kvcache显存): {info['reserved_kv_cache_memory_replica_gb']:.2f} GB ({info['reserved_kv_cache_memory_replica_gb']*1024:.2f} MB)") + logger.info(f" Per-GPU actual KV cache (单GPU实际kvcache显存): {info['actual_running_kv_cache_memory_gb']:.2f} GB ({info['actual_running_kv_cache_memory_gb']*1024:.2f} MB)") + logger.info(f" Replica actual KV cache (整个Replica实际kvcache显存): {info['actual_running_kv_cache_memory_replica_gb']:.2f} GB ({info['actual_running_kv_cache_memory_replica_gb']*1024:.2f} MB)") + logger.info(f" Active requests (当前活跃请求数): {info['active_requests_count']}") + logger.info(f" Max batch size (最大批处理大小): {info['max_batch_size']}") + logger.info("-" * 100) + + if decode_replica_info: + logger.info("Decode Replica Memory Info (Decode副本显存信息):") + for did, info in decode_replica_info.items(): + logger.info(f" Replica ID {did} (TP={info['tp']}, PP={info['pp']}):") + logger.info(f" Per-GPU total mem (单GPU总显存容量): {info['total_memory_gb']:.2f} GB ({info['total_memory_gb']*1024:.2f} MB)") + logger.info(f" Replica total mem (整个Replica总显存容量): {info['total_memory_replica_gb']:.2f} GB ({info['total_memory_replica_gb']*1024:.2f} MB)") + logger.info(f" Per-GPU model params (单GPU模型参数占用显存): {info['params_memory_gb']:.2f} GB ({info['params_memory_gb']*1024:.2f} MB)") + logger.info(f" Replica model params (整个Replica模型参数占用显存): {info['params_memory_replica_gb']:.2f} GB ({info['params_memory_replica_gb']*1024:.2f} MB)") + logger.info(f" Per-GPU reserved KV cache (单GPU预留kvcache显存): {info['reserved_kv_cache_memory_gb']:.2f} GB ({info['reserved_kv_cache_memory_gb']*1024:.2f} MB)") + logger.info(f" Replica reserved KV cache (整个Replica预留kvcache显存): {info['reserved_kv_cache_memory_replica_gb']:.2f} GB ({info['reserved_kv_cache_memory_replica_gb']*1024:.2f} MB)") + logger.info(f" Per-GPU actual KV cache (单GPU实际kvcache显存): {info['actual_running_kv_cache_memory_gb']:.2f} GB ({info['actual_running_kv_cache_memory_gb']*1024:.2f} MB)") + logger.info(f" Replica actual KV cache (整个Replica实际kvcache显存): {info['actual_running_kv_cache_memory_replica_gb']:.2f} GB ({info['actual_running_kv_cache_memory_replica_gb']*1024:.2f} MB)") + logger.info(f" Active requests (当前活跃请求数): {info['active_requests_count']}") + logger.info(f" Max batch size (最大批处理大小): {info['max_batch_size']}") + logger.info("-" * 100) + logger.info("=" * 100) + - def to_dict(self): + def to_dict(self) -> dict: return { "time": self.time, "event_type": self.event_type, "batch_id": self._batch.id, - } + } \ No newline at end of file diff --git a/vidur-alibabacloud/vidur/events/batch_stage_arrival_event.py b/vidur-alibabacloud/vidur/events/batch_stage_arrival_event.py index bb27b615..e583678f 100644 --- a/vidur-alibabacloud/vidur/events/batch_stage_arrival_event.py +++ b/vidur-alibabacloud/vidur/events/batch_stage_arrival_event.py @@ -10,6 +10,7 @@ logger = init_logger(__name__) +# A micro-batch arrives at a PP stage # 一个micro-batch到达某个PP stage class BatchStageArrivalEvent(BaseEvent): def __init__(self, time: float, replica_id: int, stage_id: int, batch: Batch): diff --git a/vidur-alibabacloud/vidur/events/batch_stage_end_event.py b/vidur-alibabacloud/vidur/events/batch_stage_end_event.py index 647046c3..ed3954a4 100644 --- a/vidur-alibabacloud/vidur/events/batch_stage_end_event.py +++ b/vidur-alibabacloud/vidur/events/batch_stage_end_event.py @@ -53,16 +53,13 @@ def handle_event( next_events = [ # 当前stage调度下一个micro-batch - # TODO: 这里有点怪,BatchStageEndEvent会引发当前stage的调度 - # BatchStageArrivalEvent也会引发当前stage的调度 - # 虽然多次调度并不会引发问题,但是有很多调度是多余的(因为stage_scheduler.is_busy = True - # 或者stage_scheduler.queue为空) - - # Schedule the next micro-batch on the current stage - # TODO: This seems odd, BatchStageEndEvent triggers scheduling of the current stage - # BatchStageArrivalEvent also triggers scheduling of the current stage - # Although multiple scheduling doesn't cause issues, many schedules are redundant + # TODO(tianhao909): odd behavior - BatchStageEndEvent triggers current stage scheduling + # BatchStageArrivalEvent also triggers current stage scheduling + # Although multiple scheduling doesn't cause issues, many schedules are redundant # (because stage_scheduler.is_busy = True or stage_scheduler.queue is empty) + # TODO(tianhao909): 这里有点怪,BatchStageEndEvent 会触发当前 stage 的调度 + # BatchStageArrivalEvent 也会触发当前 stage 的调度 + # 虽然多次调度不会引发问题,但很多调度是冗余的 ReplicaStageScheduleEvent( self.time, self._replica_id, diff --git a/vidur-alibabacloud/vidur/events/replica_schedule_event.py b/vidur-alibabacloud/vidur/events/replica_schedule_event.py index 208e7687..bd0ed514 100644 --- a/vidur-alibabacloud/vidur/events/replica_schedule_event.py +++ b/vidur-alibabacloud/vidur/events/replica_schedule_event.py @@ -27,9 +27,9 @@ def handle_event( replica_scheduler = scheduler.get_replica_scheduler(self._replica_id) # _batches中至多有PP-stages个batch - # TODO: 这里有一点奇怪,他这样的话就是每次issue PP-stages个batch + # TODO(tianhao909): odd behavior - issues PP-stages batches each time # _batches contains at most PP-stages batches - # TODO: This is a bit strange, as it issues PP-stages batches each time + # TODO(tianhao909): 这里有点奇怪,每次发射 PP-stages 个 batch self._batches = replica_scheduler.on_schedule() if not self._batches: diff --git a/vidur-alibabacloud/vidur/events/replica_stage_schedule_event.py b/vidur-alibabacloud/vidur/events/replica_stage_schedule_event.py index d6a451e9..724eb78d 100644 --- a/vidur-alibabacloud/vidur/events/replica_stage_schedule_event.py +++ b/vidur-alibabacloud/vidur/events/replica_stage_schedule_event.py @@ -47,10 +47,11 @@ def handle_event( self._is_last_stage = stage_scheduler.is_last_stage - print(f"> Debug: time={self._time} Event {self._id} of type {self._event_type} \ - Generates 1 BatchStageEndEvent replica_id={self._replica_id} stage_id={self._stage_id} \ - batch_stage={self._batch_stage}") - assert self._batch_stage.execution_time >= 0, f"> debug self._batch_stage.execution_time={self._batch_stage.execution_time}" + logger.debug(f"time={self._time} Event {self._id} of type {self._event_type} " + f"Generates 1 BatchStageEndEvent replica_id={self._replica_id} stage_id={self._stage_id} " + f"batch_stage={self._batch_stage}") + assert self._batch_stage.execution_time >= 0, \ + f"batch_stage execution_time must be non-negative, got {self._batch_stage.execution_time}" return [ BatchStageEndEvent( diff --git a/vidur-alibabacloud/vidur/execution_time_predictor/base_execution_time_predictor.py b/vidur-alibabacloud/vidur/execution_time_predictor/base_execution_time_predictor.py index d937471d..27bf45f4 100644 --- a/vidur-alibabacloud/vidur/execution_time_predictor/base_execution_time_predictor.py +++ b/vidur-alibabacloud/vidur/execution_time_predictor/base_execution_time_predictor.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod from vidur.execution_time_predictor.communication_time_predictor import TPTimePredictor +from vidur.logger import init_logger from vidur.config import ( BaseExecutionTimePredictorConfig, @@ -11,6 +12,8 @@ ) from vidur.entities import Batch, ExecutionTime +logger = init_logger(__name__) + # 返回单个micro-batch在单个TP shard,单个PP stage上的执行时间 # Returns execution time for a single micro-batch on a single TP shard and a single PP stage @@ -61,60 +64,103 @@ def get_execution_time(self, batch: Batch, pipeline_stage: int) -> ExecutionTime if self._config.backend == "simai_simulation": tensor_parallel_communication_time = self._tp_time_predictor.get_execution_time(batch) - # TODO: chentong fix it - # fy:有可能跑出来结果是-1 - # fy: Result may be -1 - assert tensor_parallel_communication_time >= 0, "> Debug: tensor_parallel_communication_time must be greater than 0" + # TODO(chentong): fix potential -1 return value + # Result may be -1 in some cases + # 有可能跑出来结果是 -1 + assert tensor_parallel_communication_time >= 0, "tensor_parallel_communication_time must be non-negative" - # >: 如果simai 后端返回-1,则调用vidur的查表方法 - # >: If simai backend returns -1, call vidur's lookup table method + # If simai backend returns -1, fall back to vidur's lookup table method + # 如果 simai 后端返回 -1,则调用 vidur 的查表方法 if tensor_parallel_communication_time == -1: tensor_parallel_communication_time = self._get_tensor_parallel_communication_time(batch) # elif self._config.simai_analytical_enable: elif self._config.backend == "simai_analytical": tensor_parallel_communication_time = self._tp_time_predictor.get_execution_time_by_simai_analytical(batch) - assert tensor_parallel_communication_time >= 0, "> Debug: tensor_parallel_communication_time must be greater than 0" + assert tensor_parallel_communication_time >= 0, "tensor_parallel_communication_time must be non-negative" - # >:如果simai 后端返回-1,则调用vidur的查表方法 - # >: If simai backend returns -1, call vidur's lookup table method + # If simai backend returns -1, fall back to vidur's lookup table method + # 如果 simai 后端返回 -1,则调用 vidur 的查表方法 if tensor_parallel_communication_time == -1: tensor_parallel_communication_time = self._get_tensor_parallel_communication_time(batch) elif self._config.backend == "aicb": - # TODO currently not supported TP communication when using aicb + # TODO(tianhao909): add TP communication support for AICB backend + # TODO(tianhao909): AICB 后端暂不支持 TP 通信 tensor_parallel_communication_time = 0 else: - assert self._config.backend == "vidur", "> Debug: self._config.backend can only be simai_simulation, simai_analytical, vidur" + assert self._config.backend == "vidur", "backend must be one of: simai_simulation, simai_analytical, aicb, vidur" tensor_parallel_communication_time = self._get_tensor_parallel_communication_time(batch) if self._config.backend == "aicb": - # > add self - # extract AICB params + # ============================================================ + # [AICB Backend] Build per-batch replica_config copy + # Need to set correct params based on current batch phase (prefill/decode) + # [AICB Backend] 构建 per-batch 的 replica_config 副本 + # 需要根据当前 batch 的 phase (prefill/decode) 设置正确参数 + # ============================================================ import copy replica_config = copy.deepcopy(self._replica_config) - # TODO is this correct? + # Determine current batch phase: prefill or decode + # 判断当前 batch 的 phase: prefill or decode batch_prefill_replica_id = batch.requests[0].prefill_replica_id batch_replica_id = batch.replica_id - # > add - # print(f"> debug self.replica_type ") - - if batch_prefill_replica_id == batch_replica_id: replica_config.phase = "prefill" else: replica_config.phase = "decode" - - tp = self._replica_config.tensor_parallel_size - pp = self._replica_config.num_pipeline_stages - # dp = 1 # TODO get world_size from dp or somehow get replica size - dp = self.simulation_config.cluster_config.num_replicas - ws = tp * pp * dp + + # ============================================================ + # [PD-Aware] Set correct TP/PP/WS/EP per phase + # PD separation: prefill/decode have independent world_size and EP + # - prefill: ws = p_tp * p_pp * num_p, ep = ws + # - decode: ws = d_tp * d_pp * num_d, ep = ws + # Non-PD: ws = tp * pp * total_dp, ep = ws + # + # [PD-Aware] 按 phase 设置正确的 TP/PP/WS/EP + # PD 分离时: prefill/decode 有独立的 world_size 和 EP + # 非 PD 场景: ws = tp * pp * total_dp, ep = ws + # ============================================================ + orig_tp = self._replica_config.tensor_parallel_size + orig_pp = self._replica_config.num_pipeline_stages + total_dp = self.simulation_config.cluster_config.num_replicas + + if replica_config.phase == "prefill" and hasattr(self._replica_config, 'prefill_world_size'): + # PD separation: use prefill cluster params / PD 分离: 使用 prefill 集群的参数 + tp = getattr(self._replica_config, '_prefill_tp', orig_tp) + pp = getattr(self._replica_config, '_prefill_pp', orig_pp) + ws = self._replica_config.prefill_world_size + ep = getattr(self._replica_config, 'prefill_ep', ws) + elif replica_config.phase == "decode" and hasattr(self._replica_config, 'decode_world_size'): + # PD separation: use decode cluster params / PD 分离: 使用 decode 集群的参数 + tp = getattr(self._replica_config, '_decode_tp', orig_tp) + pp = getattr(self._replica_config, '_decode_pp', orig_pp) + ws = self._replica_config.decode_world_size + ep = getattr(self._replica_config, 'decode_ep', ws) + else: + # Non-PD: EP = ws = tp * pp * dp / 非 PD 场景 + tp = orig_tp + pp = orig_pp + ws = tp * pp * total_dp + ep = ws + + # Write per-phase params to copied replica_config + # 将 per-phase 参数写入 copy 后的 replica_config replica_config.world_size = ws - + replica_config.expert_model_parallel_size = ep + replica_config.tensor_parallel_size = tp + replica_config.num_pipeline_stages = pp + + # Print current batch AICB params for debugging + # Note: non-PD mode also has prefill_world_size (unified interface), use pd_node_ratio to determine + # 打印当前 batch 的 AICB 参数, 方便调试确认 + # 注意: 非PD模式也有 prefill_world_size (统一接口), 用 pd_node_ratio 判断 + pd_mode = "PD-separated" if self._replica_config.pd_node_ratio < 1 else "MIXED(non-PD)" + logger.debug(f"[AICB Params] phase={replica_config.phase}, tp={tp}, pp={pp}, " + f"ws={ws}, ep={ep}, total_dp={total_dp}, mode={pd_mode}") if replica_config.phase == "prefill": bs = 1 seq = 0 @@ -122,13 +168,29 @@ def get_execution_time(self, batch: Batch, pipeline_stage: int) -> ExecutionTime if request._is_prefill_complete: continue seq += num_tokens_to_process + # Prefill phase does not need first-last interpolation + # prefill阶段不需要首尾插值 + replica_config.decode_last_seq = None elif replica_config.phase == "decode": bs = 0 seq = 0 + decode_last_seq = 0 # Last decode iteration's seq for first-last interpolation / 最后一轮decode的seq值,用于首尾插值预加载 for request, num_tokens_to_process in zip(batch.requests, batch.num_tokens): if request._is_prefill_complete: bs += 1 + # Current iteration seq = prefill_tokens + processed_decode_tokens - 1 + # 当前迭代的seq = prefill_tokens + processed_decode_tokens - 1 seq += request.num_processed_prefill_tokens + request.num_processed_decode_tokens - 1 + # Last iteration seq = prefill_tokens + (decode_tokens - 1) - 1 + # Because at last iteration processed_decode_tokens = decode_tokens - 1 + # 最后一轮的seq = prefill_tokens + (decode_tokens - 1) - 1 + # 因为最后一轮时 processed_decode_tokens = decode_tokens - 1 + decode_last_seq += request.num_processed_prefill_tokens + (request.num_decode_tokens - 1) - 1 + + # [First-Last Interpolation] Save last decode iteration seq + # [首尾插值] 保存最后一轮decode的seq值 + replica_config.decode_last_seq = decode_last_seq + logger.debug(f"[AICB first-last interpolation] decode current seq={seq}, last iter seq={decode_last_seq}") replica_config.batch_size = bs replica_config.seq_len = seq @@ -160,6 +222,7 @@ def get_execution_time(self, batch: Batch, pipeline_stage: int) -> ExecutionTime self.replica_scheduler_config # self._model_config ) + else: return ExecutionTime( self._num_layers_per_pipeline_stage, diff --git a/vidur-alibabacloud/vidur/execution_time_predictor/communication_time_predictor.py b/vidur-alibabacloud/vidur/execution_time_predictor/communication_time_predictor.py index 4da6d3ad..160148bd 100644 --- a/vidur-alibabacloud/vidur/execution_time_predictor/communication_time_predictor.py +++ b/vidur-alibabacloud/vidur/execution_time_predictor/communication_time_predictor.py @@ -25,8 +25,8 @@ def __init__(self, self.predictor_config = predictor_config self.replica_config = replica_config # TODO ct: change to sizeof(tensor.dtype) - # fy:得做; 动态调整dtype; 从config里面获取 - # fy: need to do; dynamically adjust dtype; get from config + # TODO: dynamically adjust dtype from config + # 待实现: 从 config 动态获取 dtype self.tensor_size = 2 self.workload: SimAIWorkload = SimAIWorkload( tp_size=replica_config.tensor_parallel_size, @@ -79,8 +79,8 @@ def get_execution_time(self, batch: Batch): # Generate hash based on all relevant parameters of WorkItem - # TODO: > 增加layer0 str(1) "ALLREDUCE"等其他变量对于hash的影响,目前是写成固定值的 - # TODO: > Add impact of other variables like layer0 str(1) "ALLREDUCE" to hash, currently hardcoded + # TODO(tianhao909): add layer0 str(1) "ALLREDUCE" etc. to hash computation, currently hardcoded + # TODO(tianhao909): 增加 layer0 str(1) "ALLREDUCE" 等变量对 hash 的影响,目前是固定值 work_item_data = ( "layer0" + @@ -173,13 +173,13 @@ def get_execution_time(self, batch: Batch): # > rewrite: add two features to reuse same workloads and results of same commands def get_execution_time_by_simai_analytical(self, batch: Batch): """ + Predict communication time using SimAI analytical tool. + Args: batch: Batch object containing batch information to process. + Returns: float: Predicted execution time (ms), returns -1 on error. + 使用SimAI分析工具预测通信时间的方法 Args: batch: Batch对象,包含需要处理的批次信息 Returns: float: 预测的执行时间(毫秒),如果出错则返回-1 - - The method of predicting communication time using the SimAI analysis tool - Args: batch: Batch object, containing the batch information to be processed. - Returns: float: Predicted execution time (milliseconds), returns -1 if an error occurs. """ self.workload.flush() num_tokens_in_batch = batch._total_num_tokens_rounded @@ -198,12 +198,12 @@ def get_execution_time_by_simai_analytical(self, batch: Batch): * self.replica_config.tensor_parallel_size**1.25) - # 为workload和命令生成唯一标识符 - # 基于WorkItem的所有相关参数生成哈希值 - # TODO: > 增加layer0 str(1) "ALLREDUCE"等其他变量对于hash的影响,目前是写成固定值的 + # TODO(tianhao909): add layer0 str(1) "ALLREDUCE" etc. to hash computation, currently hardcoded + # 为 workload 和命令生成唯一标识符 # Generate unique identifier for workload and command + # 基于 WorkItem 的所有相关参数生成哈希值 # Generate hash based on all relevant parameters of WorkItem - # TODO: > Add impact of other variables like layer0 str(1) "ALLREDUCE" to hash, currently hardcoded + # TODO(tianhao909): 增加 layer0 str(1) "ALLREDUCE" 等变量对 hash 的影响,目前是固定值 work_item_data = ( "layer0" + @@ -214,6 +214,7 @@ def get_execution_time_by_simai_analytical(self, batch: Batch): str(self.tensor_size) ) + # Generate MD5 hash as workload identifier # 使用MD5哈希算法生成工作负载标识符 workload_identifier = hashlib.md5(work_item_data.encode()).hexdigest() @@ -301,9 +302,8 @@ def get_execution_time_by_simai_analytical(self, batch: Batch): # Get latency data from index 5 position (microseconds), convert to milliseconds latency = float(rows[-1][5]) * 1e-3 - # TODO: > 量太小有可能tp通信量是零, 比如 通信量为65535的时候, 就是0 - # 通信量为3276800的时候, laytency=0.015ms - # TODO: > Amount may be too small for tp communication to be zero, e.g. when communication amount is 65535, it's 0 + # TODO(tianhao909): handle near-zero TP communication amounts (e.g. 65535 bytes -> 0 latency) + # TODO(tianhao909): 通信量太小时 TP 通信可能为 0,如 65535 bytes 时 latency=0 # When communication amount is 3276800, latency=0.015ms # assert all_reduce_bytes>0 and latency > 0, f"> Debug: all_reduce_bytes={all_reduce_bytes} latency={latency} need to be >=0" diff --git a/vidur-alibabacloud/vidur/execution_time_predictor/sklearn_execution_time_predictor.py b/vidur-alibabacloud/vidur/execution_time_predictor/sklearn_execution_time_predictor.py index b46a4f03..a3e505af 100644 --- a/vidur-alibabacloud/vidur/execution_time_predictor/sklearn_execution_time_predictor.py +++ b/vidur-alibabacloud/vidur/execution_time_predictor/sklearn_execution_time_predictor.py @@ -681,9 +681,10 @@ def _train_models(self) -> Dict[str, BaseEstimator]: return models def _predict_for_compute_models_by_aicb(self) -> Dict[str, Any]: - # 存储预测结果 + # Store prediction results / 存储预测结果 predictions = {} + # Define compute layer model names for prediction # 定义需要预测的计算层模型名 model_names = [ "attn_pre_proj", @@ -691,84 +692,100 @@ def _predict_for_compute_models_by_aicb(self) -> Dict[str, Any]: "mlp_up_proj", "mlp_down_proj", "mlp_act", - # "attn_rope", # 当前未启用RoPE层建模 + # "attn_rope", # RoPE layer modeling not enabled / 当前未启用RoPE层建模 "attn_kv_cache_save", "input_layernorm", "post_attention_layernorm", "add", ] + # Add send/recv comm model if pipeline parallelism exists # 若存在流水线并行,则加入send/recv通信模型 if self._replica_config.num_pipeline_stages > 1: model_names.append("send_recv") + # Add all_reduce comm model if tensor parallelism exists # 若存在张量并行,则加入all_reduce通信模型 if self._replica_config.tensor_parallel_size > 1: model_names.append("all_reduce") + # Generate range from 1 to max tokens for batch prediction # 生成从1到最大token数的范围,用于批量预测 num_token_range = np.arange(1, self._max_tokens + 1) - # 构造输入DataFrame + # Construct input DataFrame / 构造输入DataFrame X = pd.DataFrame({"num_tokens": num_token_range}) + # Predict and cache results for each compute model # 对每个计算模型进行预测,并将结果缓存 for model_name in model_names: + # Create simulated predictions with linear growth based on num_tokens + # Set different base time and growth rate per model type # 创建模拟的预测值,基于num_tokens线性增长 # 根据模型类型设置不同的基础时间和增长率 if model_name in ["attn_pre_proj", "attn_post_proj"]: + # Attention projection: base 0.001ms, +0.0001ms per token # 注意力投影层:基础时间0.001ms,每token增加0.0001ms predictions[model_name] = { (num_tokens,): 0.001 + 0.0001 * num_tokens for num_tokens in num_token_range } elif model_name in ["mlp_up_proj", "mlp_down_proj"]: + # MLP layer: base 0.0015ms, +0.00015ms per token # MLP层:基础时间0.0015ms,每token增加0.00015ms predictions[model_name] = { (num_tokens,): 0.0015 + 0.00015 * num_tokens for num_tokens in num_token_range } elif model_name == "mlp_act": + # MLP activation: base 0.0005ms, +0.00005ms per token # MLP激活层:基础时间0.0005ms,每token增加0.00005ms predictions[model_name] = { (num_tokens,): 0.0005 + 0.00005 * num_tokens for num_tokens in num_token_range } elif model_name == "attn_kv_cache_save": + # KV cache save: base 0.0002ms, +0.00002ms per token # KV缓存保存:基础时间0.0002ms,每token增加0.00002ms predictions[model_name] = { (num_tokens,): 0.0002 + 0.00002 * num_tokens for num_tokens in num_token_range } elif model_name in ["input_layernorm", "post_attention_layernorm"]: + # LayerNorm: base 0.0003ms, +0.00003ms per token # LayerNorm层:基础时间0.0003ms,每token增加0.00003ms predictions[model_name] = { (num_tokens,): 0.0003 + 0.00003 * num_tokens for num_tokens in num_token_range } elif model_name == "add": + # Add op: base 0.0001ms, +0.00001ms per token # Add操作:基础时间0.0001ms,每token增加0.00001ms predictions[model_name] = { (num_tokens,): 0.0001 + 0.00001 * num_tokens for num_tokens in num_token_range } elif model_name == "send_recv": + # Send/Recv comm: base 0.01ms, +0.001ms per token # Send/Recv通信:基础时间0.01ms,每token增加0.001ms predictions[model_name] = { (num_tokens,): 0.01 + 0.001 * num_tokens for num_tokens in num_token_range } elif model_name == "all_reduce": + # All-reduce comm: base 0.02ms, +0.002ms per token # All-reduce通信:基础时间0.02ms,每token增加0.002ms predictions[model_name] = { (num_tokens,): 0.02 + 0.002 * num_tokens for num_tokens in num_token_range } + # Predict and cache results for each compute model # 对每个计算模型进行预测,并将结果缓存 # for model_name in model_names: # model = self._models[model_name] # predictions[model_name] = self._get_model_prediction(model_name, model, X) + # Return all compute layer predictions (for later lookup) # 返回所有计算相关层的预测结果(用于后续查表) return predictions @@ -809,14 +826,14 @@ def _predict_for_compute_models(self) -> Dict[str, Any]: def _predict_for_cpu_overhead_models_by_aicb(self) -> Dict[str, Any]: - # 若跳过CPU开销建模,则直接返回空 + # Skip if CPU overhead modeling is disabled / 若跳过CPU开销建模,则直接返回空 if self._config.skip_cpu_overhead_modeling: return {} - # 存储CPU开销预测结果 + # Store CPU overhead predictions / 存储CPU开销预测结果 predictions = {} - # CPU相关的开销模型名称 + # CPU-related overhead model names / CPU相关的开销模型名称 model_names = [ "schedule", "sampler_e2e", @@ -825,52 +842,58 @@ def _predict_for_cpu_overhead_models_by_aicb(self) -> Dict[str, Any]: "ray_comm_time", ] + # Batch size range: 1 to max prediction batch size # 批处理大小范围:1 到 最大预测批大小 batch_size_range = np.arange(1, self._config.prediction_max_batch_size + 1) - # 构造输入数据 + # Construct input data / 构造输入数据 X = pd.DataFrame({"batch_size": batch_size_range}) - # 对每个CPU开销模型进行预测 + # Predict for each CPU overhead model / 对每个CPU开销模型进行预测 # for model_name in model_names: # model = self._models[model_name] # predictions[model_name] = self._get_model_prediction(model_name, model, X) - # 对每个CPU开销模型进行预测 + # Predict for each CPU overhead model / 对每个CPU开销模型进行预测 for model_name in model_names: - # 为每个模型生成模拟值 + # Generate simulated values for each model / 为每个模型生成模拟值 if model_name == "schedule": + # Scheduling overhead: base 0.5ms, +0.05ms per batch # 调度开销:基础时间0.5ms,每增加一个batch增加0.05ms predictions[model_name] = { (batch_size,): 0.5 + 0.05 * batch_size for batch_size in batch_size_range } elif model_name == "sampler_e2e": + # Sampler e2e overhead: base 1.0ms, +0.1ms per batch # 采样端到端开销:基础时间1.0ms,每增加一个batch增加0.1ms predictions[model_name] = { (batch_size,): 1.0 + 0.1 * batch_size for batch_size in batch_size_range } elif model_name == "prepare_inputs_e2e": + # Prepare inputs e2e overhead: base 0.8ms, +0.08ms per batch # 准备输入端到端开销:基础时间0.8ms,每增加一个batch增加0.08ms predictions[model_name] = { (batch_size,): 0.8 + 0.08 * batch_size for batch_size in batch_size_range } elif model_name == "process_model_outputs": + # Process model outputs overhead: base 0.3ms, +0.03ms per batch # 处理模型输出开销:基础时间0.3ms,每增加一个batch增加0.03ms predictions[model_name] = { (batch_size,): 0.3 + 0.03 * batch_size for batch_size in batch_size_range } elif model_name == "ray_comm_time": + # Ray comm time: base 0.2ms, +0.02ms per batch # Ray通信时间:基础时间0.2ms,每增加一个batch增加0.02ms predictions[model_name] = { (batch_size,): 0.2 + 0.02 * batch_size for batch_size in batch_size_range } - # 返回CPU开销部分的预测结果 + # Return CPU overhead predictions / 返回CPU开销部分的预测结果 return predictions def _predict_for_cpu_overhead_models(self) -> Dict[str, Any]: @@ -897,21 +920,23 @@ def _predict_for_cpu_overhead_models(self) -> Dict[str, Any]: return predictions def _predict_for_attention_layer_models_by_aicb(self) -> Dict[str, Any]: - # 存储注意力层预测结果 + # Store attention layer predictions / 存储注意力层预测结果 predictions = {} - # 解码阶段的batch size枚举范围 + # Decode batch size enumeration range / 解码阶段的batch size枚举范围 decode_batch_size_range = np.arange( 1, self._config.prediction_max_batch_size + 1 ) + # KV cache size range, incremented by block granularity # kv缓存大小的枚举范围,按block粒度递增 decode_kv_cache_size_range = np.arange( 0, self._config.prediction_max_tokens_per_request + 1, - self._config.kv_cache_prediction_granularity, # kv cache存储为block,这里就是block_size + self._config.kv_cache_prediction_granularity, # KV cache stored in blocks / kv cache存储为block,这里就是block_size ) - # 解码时prefill chunk size固定为0 + # Decode prefill_chunk_size is fixed at 0 / 解码时prefill chunk size固定为0 decode_prefill_chunk_size_range = [0] + # Generate all combinations (Cartesian product) # 生成所有组合(笛卡尔积) decode_batch_size, decode_kv_cache_size, decode_prefill_chunk_size = zip( *product( @@ -921,18 +946,21 @@ def _predict_for_attention_layer_models_by_aicb(self) -> Dict[str, Any]: ) ) + # Prefill only supports batch_size=1 (single request typically) # Prefill阶段仅支持batch_size=1(通常单请求) prefill_batch_size_range = [1] + # Also enumerate KV cache size by block granularity # 同样按block粒度枚举kv缓存大小 prefill_kv_cache_size_range = np.arange( 0, self._config.prediction_max_tokens_per_request + 1, self._config.kv_cache_prediction_granularity, ) - # Prefill chunk大小从1到最大允许值 + # Prefill chunk size from 1 to max / Prefill chunk大小从1到最大允许值 prefill_prefill_chunk_size_range = np.arange( 1, self._config.prediction_max_prefill_chunk_size + 1 ) + # Generate all prefill parameter combinations # 生成所有prefill参数组合 prefill_batch_size, prefill_kv_cache_size, prefill_prefill_chunk_size = zip( *product( @@ -942,6 +970,7 @@ def _predict_for_attention_layer_models_by_aicb(self) -> Dict[str, Any]: ) ) + # Merge decode and prefill combinations into one DataFrame # 合并decode和prefill的所有参数组合成一个DataFrame attention_df = pd.DataFrame( { @@ -952,47 +981,56 @@ def _predict_for_attention_layer_models_by_aicb(self) -> Dict[str, Any]: } ) - # 标记是否为decode阶段(chunk_size == 0) + # Mark decode stage (chunk_size == 0) / 标记是否为decode阶段 attention_df["is_decode"] = attention_df["prefill_chunk_size"] == 0 + # num_tokens = max(prefill_chunk_size, batch_size) # num_tokens取prefill_chunk_size和batch_size的最大值 attention_df["num_tokens"] = attention_df[ ["prefill_chunk_size", "batch_size"] ].max(axis=1) + # Add squared prefill chunk size feature (for model input) # 添加prefill chunk大小的平方项(用于模型输入) attention_df["prefill_chunk_size_squared"] = ( attention_df["prefill_chunk_size"] ** 2 ) - # 分离出prefill和decode的数据子集 + # Split into prefill and decode subsets / 分离出prefill和decode的数据子集 prefill_df = attention_df[~attention_df["is_decode"]] decode_df = attention_df[attention_df["is_decode"]] - # 进一步筛选有缓存的prefill数据 + # Filter prefill data with existing cache / 进一步筛选有缓存的prefill数据 chunked_prefill_df = prefill_df[prefill_df["kv_cache_size"] > 0].copy() - # 计算总的prefill token数量 + # Calculate total prefill token count / 计算总的prefill token数量 chunked_prefill_df["total_prefill_tokens"] = ( chunked_prefill_df["kv_cache_size"] + chunked_prefill_df["prefill_chunk_size"] ) + # Predict prefill attention time using simulated values (based on kv_cache and chunk^2) # 使用模拟值预测prefill注意力时间(基于kv缓存和chunk^2) prefill_data = prefill_df[["kv_cache_size", "prefill_chunk_size_squared"]].values predictions["attn_prefill"] = {} for kv_cache_size, prefill_chunk_size_squared in prefill_data: + # Prefill time based on kv_cache size and chunk size squared + # base 0.01ms + kv_cache/100 * 0.001ms + chunk_size_sq/10000 * 0.0001ms # Prefill时间基于kv_cache大小和chunk大小平方计算 # 基础时间0.01ms + kv_cache每增加100增加0.001ms + chunk_size_squared每增加10000增加0.0001ms time = 0.01 + (kv_cache_size * 0.001 / 100) + (prefill_chunk_size_squared * 0.0001 / 10000) predictions["attn_prefill"][(int(kv_cache_size), int(prefill_chunk_size_squared))] = time + # Predict decode attention time using simulated values (based on batch_size and kv_cache) # 使用模拟值预测decode注意力时间(基于batch_size和kv缓存) decode_data = decode_df[["batch_size", "kv_cache_size"]].values predictions["attn_decode"] = {} for batch_size, kv_cache_size in decode_data: + # Decode time based on batch_size and kv_cache size + # base 0.005ms + 0.002ms per batch_size + kv_cache/100 * 0.0005ms # Decode时间基于batch_size和kv_cache大小计算 # 基础时间0.005ms + batch_size每增加1增加0.002ms + kv_cache_size每增加100增加0.0005ms time = 0.005 + (batch_size * 0.002) + (kv_cache_size * 0.0005 / 100) predictions["attn_decode"][(int(batch_size), int(kv_cache_size))] = time + # # Predict prefill attention time using trained model (based on kv_cache and chunk^2) # # 使用训练好的模型预测prefill注意力时间(基于kv缓存和chunk^2) # predictions["attn_prefill"] = self._get_model_prediction( # "attn_prefill", @@ -1000,6 +1038,7 @@ def _predict_for_attention_layer_models_by_aicb(self) -> Dict[str, Any]: # prefill_df[["kv_cache_size", "prefill_chunk_size_squared"]], # ) + # # Predict decode attention time using trained model (based on batch_size and kv_cache) # # 使用训练好的模型预测decode注意力时间(基于batch_size和kv缓存) # predictions["attn_decode"] = self._get_model_prediction( # "attn_decode", @@ -1007,7 +1046,7 @@ def _predict_for_attention_layer_models_by_aicb(self) -> Dict[str, Any]: # decode_df[["batch_size", "kv_cache_size"]], # ) - # 返回注意力层所有预测结果 + # Return all attention layer predictions / 返回注意力层所有预测结果 return predictions def _predict_for_attention_layer_models(self) -> Dict[str, Any]: @@ -1235,7 +1274,8 @@ def _get_attention_decode_execution_time(self, batch: Batch) -> float: ) = self._get_batch_decode_attention_params(batch) if decode_batch_size == 0: return 0 - #TODO decode 打印 + # TODO(tianhao909): add decode output logging + # TODO(tianhao909): 添加 decode 输出日志 return self._predictions["attn_decode"][ (decode_batch_size, decode_avg_kv_cache_size) ] * ( diff --git a/vidur-alibabacloud/vidur/metrics/cdf_sketch.py b/vidur-alibabacloud/vidur/metrics/cdf_sketch.py index 50aeebf2..327de59c 100644 --- a/vidur-alibabacloud/vidur/metrics/cdf_sketch.py +++ b/vidur-alibabacloud/vidur/metrics/cdf_sketch.py @@ -5,6 +5,7 @@ from ddsketch.ddsketch import DDSketch from vidur.logger import init_logger +from vidur.metrics.data_series import _safe_write_image # qoder logger = init_logger(__name__) @@ -146,5 +147,6 @@ def plot_cdf(self, path: str, plot_name: str, x_axis_label: str = None) -> None: labels={"x": x_axis_label}, ) fig.update_traces(marker=dict(color="red", size=2)) - fig.write_image(f"{path}/{plot_name}.png") + # fig.write_image(f"{path}/{plot_name}.png") + _safe_write_image(fig, f"{path}/{plot_name}.png") # qoder self._save_df(df, path, plot_name) diff --git a/vidur-alibabacloud/vidur/metrics/data_series.py b/vidur-alibabacloud/vidur/metrics/data_series.py index 51be848d..3e5648e2 100644 --- a/vidur-alibabacloud/vidur/metrics/data_series.py +++ b/vidur-alibabacloud/vidur/metrics/data_series.py @@ -11,6 +11,34 @@ logger = init_logger(__name__) +# Chrome/Kaleido 是否可用的标记, 首次失败后跳过后续所有 write_image 调用 +# Flag for Chrome/Kaleido availability, skip all subsequent write_image after first failure +_KALEIDO_AVAILABLE = True + + +def _safe_write_image(fig, path: str): + """ + 安全地写入图片, Chrome/Kaleido 不可用时优雅跳过 + Safely write image, gracefully skip when Chrome/Kaleido is unavailable + """ + global _KALEIDO_AVAILABLE + if not _KALEIDO_AVAILABLE: + return + try: + fig.write_image(path) + except RuntimeError as e: + if "Chrome" in str(e) or "Kaleido" in str(e): + _KALEIDO_AVAILABLE = False + logger.warning( + f"[Plot] Chrome/Kaleido 不可用, 跳过 PNG 生成. " + f"运行 'plotly_get_chrome' 安装 Chrome 后可恢复. " + f"CSV 数据仍会正常保存." + ) + else: + raise + + + class DataSeries: def __init__( self, @@ -83,6 +111,11 @@ def print_series_stats( if y_name is None: y_name = self._y_name + + # 跳过非数值列的统计 | Skip statistics for non-numeric columns + if not pd.api.types.is_numeric_dtype(df[y_name]): + logger.debug(f"{plot_name}: {y_name} is non-numeric, skipping stats") + return logger.debug( f"{plot_name}: {y_name} stats:" @@ -108,6 +141,12 @@ def print_distribution_stats( if y_name is None: y_name = self._y_name + + # 跳过非数值列的统计 (如 pd_p2p_comm_dtype='fp8' 等字符串指标) + # Skip statistics for non-numeric columns (e.g., string metrics) + if not pd.api.types.is_numeric_dtype(df[y_name]): + logger.debug(f"{plot_name}: {y_name} is non-numeric, skipping stats") + return logger.debug( f"{plot_name}: {y_name} stats:" @@ -207,7 +246,8 @@ def plot_step( labels={"x": y_axis_label}, ) fig.update_traces(marker=dict(color="red", size=2)) - fig.write_image(f"{path}/{plot_name}.png") + # fig.write_image(f"{path}/{plot_name}.png") + _safe_write_image(fig, f"{path}/{plot_name}.png") self._save_df(df, path, plot_name) @@ -219,6 +259,11 @@ def plot_cdf(self, path: str, plot_name: str, y_axis_label: str = None) -> None: y_axis_label = self._y_name df = self._to_df() + + # 跳过非数值列 | Skip non-numeric columns + if not pd.api.types.is_numeric_dtype(df[self._y_name]): + self._save_df(df, path, plot_name) + return self.print_distribution_stats(df, plot_name) @@ -252,7 +297,8 @@ def plot_cdf(self, path: str, plot_name: str, y_axis_label: str = None) -> None: df, x=self._y_name, y="cdf", markers=True, labels={"x": y_axis_label} ) fig.update_traces(marker=dict(color="red", size=2)) - fig.write_image(f"{path}/{plot_name}.png") + # fig.write_image(f"{path}/{plot_name}.png") + _safe_write_image(fig, f"{path}/{plot_name}.png") self._save_df(df, path, plot_name) def plot_histogram(self, path: str, plot_name: str) -> None: @@ -260,6 +306,10 @@ def plot_histogram(self, path: str, plot_name: str) -> None: return df = self._to_df() + + # 跳过非数值列 | Skip non-numeric columns + if not pd.api.types.is_numeric_dtype(df[self._y_name]): + return self.print_distribution_stats(df, plot_name) @@ -292,7 +342,8 @@ def plot_histogram(self, path: str, plot_name: str) -> None: if self._save_plots: fig = px.histogram(df, x=self._y_name, nbins=25) - fig.write_image(f"{path}/{plot_name}.png") + # fig.write_image(f"{path}/{plot_name}.png") + _safe_write_image(fig, f"{path}/{plot_name}.png") def plot_differential(self, path: str, plot_name: str) -> None: if len(self._data_series) == 0: @@ -333,6 +384,7 @@ def plot_differential(self, path: str, plot_name: str) -> None: if self._save_plots: fig = px.line(df, x=self._x_name, y=differential_col_name, markers=True) fig.update_traces(marker=dict(color="red", size=2)) - fig.write_image(f"{path}/{plot_name}.png") + # fig.write_image(f"{path}/{plot_name}.png") + _safe_write_image(fig, f"{path}/{plot_name}.png") self._save_df(df, path, plot_name) diff --git a/vidur-alibabacloud/vidur/metrics/metrics_store.py b/vidur-alibabacloud/vidur/metrics/metrics_store.py index 88db5c82..601d038e 100644 --- a/vidur-alibabacloud/vidur/metrics/metrics_store.py +++ b/vidur-alibabacloud/vidur/metrics/metrics_store.py @@ -10,6 +10,7 @@ from vidur.entities import Batch, BatchStage, ExecutionTime, Request from vidur.logger import init_logger from vidur.metrics.cdf_sketch import CDFSketch +from vidur.metrics.data_series import _safe_write_image # qoder from vidur.metrics.constants import ( BatchMetricsCountDistribution, BatchMetricsTimeDistribution, @@ -300,6 +301,7 @@ def _store_bar_plot( labels={"x": x_label, "y": y_label}, ) fig.write_image(f"{base_path}/{plot_name}.png") + _safe_write_image(fig, f"{base_path}/{plot_name}.png") # qoder def _store_operation_metrics(self, base_plot_path: str): if not self._config.store_operation_metrics: @@ -369,7 +371,6 @@ def _store_operation_metrics(self, base_plot_path: str): def _store_request_metrics(self, base_plot_path: str): if not self._config.store_request_metrics: return - # import pdb; pdb.set_trace() # > debug all_request_metrics = list( self._request_metrics_time_distributions.values() ) + list(self._request_metrics_histogram.values()) diff --git a/vidur-alibabacloud/vidur/profiling/collectives/benchmark_runner.py b/vidur-alibabacloud/vidur/profiling/collectives/benchmark_runner.py index 05e921a4..354f3a04 100644 --- a/vidur-alibabacloud/vidur/profiling/collectives/benchmark_runner.py +++ b/vidur-alibabacloud/vidur/profiling/collectives/benchmark_runner.py @@ -86,8 +86,8 @@ def _init_communication( f"ip_addr: {ray.util.get_node_ip_address()}, CUDA_VISIBLE_DEVICES: {os.environ['CUDA_VISIBLE_DEVICES']}" ) - # TODO > 可以改成deepep后端 - # TODO > can be changed to deepep backend + # TODO(tianhao909): support DeepEP backend + # TODO(tianhao909): 支持 DeepEP 后端 torch.distributed.init_process_group( backend="nccl", rank=rank, diff --git a/vidur-alibabacloud/vidur/profiling/collectives/collectives_impl.py b/vidur-alibabacloud/vidur/profiling/collectives/collectives_impl.py index a159f602..477188bf 100644 --- a/vidur-alibabacloud/vidur/profiling/collectives/collectives_impl.py +++ b/vidur-alibabacloud/vidur/profiling/collectives/collectives_impl.py @@ -37,9 +37,8 @@ def __init__( dtype=dtype, device="cuda", ) - # TODO > elif collective == "all_to_all": + # TODO(tianhao909): add all_to_all collective support # elif collective == "all_to_all": - # # TODO > change _reduce_buffer to what? all to all buffer? # self._reduce_buffer = torch.empty( # size=(size * num_workers,), # dtype=dtype, @@ -61,7 +60,6 @@ def __init__( if not self._disable_graph: self._graph = self._build_graph() - # > self._num_workers = num_workers def _run_all_reduce(self): @@ -80,10 +78,11 @@ def _run_send_recv(self): torch.distributed.recv(self._buffer, 0) def _run_reduce_scatter(self): - # > torch.distributed function: def reduce_scatter_tensor(output, input, op=ReduceOp.SUM, group=None, async_op=False): + # torch.distributed function: reduce_scatter_tensor torch.distributed.reduce_scatter_tensor(self._buffer, self._reduce_buffer) - # TODO > modify according to def all_to_all(output_tensor_list, input_tensor_list, group=None, async_op=False): + # TODO(tianhao909): implement all_to_all collective + # TODO(tianhao909): 实现 all_to_all 集合通信 # Or use all_to_all_single first? # def all_to_all_single( # output, diff --git a/vidur-alibabacloud/vidur/scheduler/global_scheduler/splitwise_global_scheduler.py b/vidur-alibabacloud/vidur/scheduler/global_scheduler/splitwise_global_scheduler.py index dd00de19..12092564 100644 --- a/vidur-alibabacloud/vidur/scheduler/global_scheduler/splitwise_global_scheduler.py +++ b/vidur-alibabacloud/vidur/scheduler/global_scheduler/splitwise_global_scheduler.py @@ -1,6 +1,6 @@ from typing import List, Tuple, Dict -# from vidur.config import Config +from vidur.logger import init_logger from vidur.entities import Replica, Request from vidur.scheduler.global_scheduler.base_global_scheduler import BaseGlobalScheduler @@ -14,6 +14,8 @@ ReplicaSchedulerRegistry, ) +logger = init_logger(__name__) + # > from vidur.entities.task import Task, TaskType from vidur.entities.flow import Flow, FlowType @@ -21,8 +23,8 @@ from vidur.entities.replica import Replica, ReplicaType from vidur.entities.request import Request, RequestType -# TODO: > > 参考 sw写的;但也很多区别; 换一个名字; 类似pd分离的其他名字; 不严格是sw了 -# TODO: > > Refer to sw implementation; but there are many differences; need a new name; similar to pd separation; not strictly sw anymore +# TODO(tianhao909): rename class - not strictly Splitwise anymore, more like PD-separation scheduler +# TODO(tianhao909): 重命名类,已不严格是 Splitwise,更像 PD 分离调度器 class SplitwiseGlobalScheduler(BaseGlobalScheduler): # Splitwise Global Scheduler. def __init__(self, config: SimulationConfig, replicas: Dict[int, Replica]): # Call parent class initialization method @@ -33,12 +35,14 @@ def __init__(self, config: SimulationConfig, replicas: Dict[int, Replica]): self._replicas = replicas # Save replica dictionary as instance private attribute, key is replica ID, value is replica object self._num_replicas = len(self._replicas) # Calculate and save total number of replicas - # TODO > improve pd_node_ratio + # TODO(tianhao909): make pd_node_ratio configurable + # TODO(tianhao909): 优化 pd_node_ratio 的配置方式 # self.pd_node_ratio = 0.5 self.pd_node_ratio = self._replicas[0].pd_node_ratio - assert self.pd_node_ratio >= 0 and self.pd_node_ratio <= 1, "> Debug: pd_node_ratio must be between 0 and 1." + assert self.pd_node_ratio >= 0 and self.pd_node_ratio <= 1, "pd_node_ratio must be between 0 and 1" # self._sub_scheduler = self._config.splitwise_scheduler_sub_scheduler # Get sub-scheduler type from configuration - # TODO > improve _sub_scheduler flexible choice + # TODO(tianhao909): make _sub_scheduler configurable + # TODO(tianhao909): 优化 _sub_scheduler 的灵活选择 # self._sub_scheduler = "round_robin" self._sub_scheduler = "lor" @@ -148,14 +152,11 @@ def __init__(self, config: SimulationConfig, replicas: Dict[int, Replica]): # self.mixed_instances = [] # Mixed instance list (can handle prompts and tokens) # self.token_instances = [] # Token instance list - # TODO : > 增加到输入或者仿真里面 - # fy 这个需要是一个入参 从config里面读取 - # TODO : > Add to input or simulation - # fy This needs to be an input parameter read from config + # TODO(tianhao909): add transfer_bandwidth to config input + # TODO(tianhao909): 增加到输入或仿真配置中,从 config 读取 self.transfer_bandwidth = 0 self.transfer_bandwidth = 200 * 1024**3 # Gbps转换为bps - # > self.p_request_counter = 0 self.d_request_counter = 0 @@ -346,7 +347,7 @@ def schedule(self) -> List[Tuple[int, Request]]: # Execute scheduling logic met prefill_replica = None - + # fth 目前p req 进入 p replica的 lor 策略: # vidur's lor: # replica_id = min(pending_prefill_requests_map.items(), key=lambda x: x[1])[0] @@ -373,21 +374,46 @@ def schedule(self) -> List[Tuple[int, Request]]: # Execute scheduling logic met request.decode_replica_id = replica_id decode_request_mapping.append((decode_replica.id, request)) - # TODO fy: 没有用的 task dag等相关代码都可以删掉; 优先级相对低 - # task继承 req; 或者让task能构造req; - # TODO fy: Unused task dag related code can be deleted; relatively low priority + # TODO(tianhao909): remove unused task DAG code (low priority) + # TODO(tianhao909): 删除未使用的 task DAG 相关代码(优先级较低) # task inherits from req; or let task construct req + # task 继承 req;或者让 task 能构造 req + ''' if prefill_replica != decode_replica: # If prompt instance and token instance are different + # ============================================================ + # [冗余代码分析] add_to_pool + DAG + sched_* 运行时验证 + # + # 以下代码在 Splitwise PD分离 实际调度流程中并不影响核心逻辑: + # 1. add_to_pool(): 将request加入replica.pending_requests + # - prefill端: batch_end_event.py 会从中remove,但不影响调度 + # - decode端: 从未被消费,纯冗余 + # 2. request DAG (prefill_task, decode_task): 创建了DAG图, + # 但_get_next_batch()使用的是_request_queue,不读取DAG + # 3. add_kv_cache_transfer(): 构建了flow node,但模拟不使用 + # 4. sched_memory/sched_pending_tokens: 设置但从未被读取用于调度 + # + # 保留这些代码以兼容可能的上层逻辑,但标注为冗余。 + # ============================================================ + logger.debug(f"[Redundant code check (冗余代码验证)] schedule(): " + f"req={request.id}, p_replica={prefill_replica.id}, d_replica={decode_replica.id}") + logger.debug(f" add_to_pool(prefill_task): added to p_replica.pending_requests " + f"(加入 p_replica.pending_requests, len={len(prefill_replica.pending_requests)})") + logger.debug(f" add_to_pool(decode_task): added to d_replica.pending_requests " + f"(加入 d_replica.pending_requests, len={len(decode_replica.pending_requests)}) [redundant, 冗余]") prefill_replica.add_to_pool(prefill_task) - decode_replica.add_to_pool(decode_task) + # decode_replica.add_to_pool(decode_task) + decode_replica.add_to_pool(decode_task) # [冗余] decode端pending_requests从未被消费 + # [冗余] 以下KV cache transfer/DAG操作不影响实际调度 # 在实例之间传输KV缓存 # Transfer KV cache between instances self.add_kv_cache_transfer(request, prefill_replica, decode_replica, self.transfer_bandwidth) + + # [冗余] sched_memory 设置但从未被核心调度逻辑读取 prefill_replica.sched_memory += prefill_task.max_memory(prefill_replica) # Update prompt instance memory usage decode_replica.sched_memory += prefill_task.max_memory(decode_replica) + \ decode_task.max_memory(decode_replica) # Update token instance memory usage @@ -398,10 +424,11 @@ def schedule(self) -> List[Tuple[int, Request]]: # Execute scheduling logic met prefill_replica.sched_memory += prefill_task.max_memory(prefill_replica) + \ decode_task.max_memory(prefill_replica) # Update instance memory usage prefill_task.chain = [decode_task] # Set token task as successor of prompt task - + + # [冗余] sched_pending_tokens 设置但从未被核心调度逻辑读取 prefill_replica.sched_pending_tokens += prefill_task.prompt_size # Update prompt instance pending token count decode_replica.sched_pending_tokens += 1 # Update token instance pending token count - + ''' # > for req in requests_to_remove: self._request_queue.remove(req) diff --git a/vidur-alibabacloud/vidur/scheduler/replica_scheduler/splitwise_replica_scheduler.py b/vidur-alibabacloud/vidur/scheduler/replica_scheduler/splitwise_replica_scheduler.py index ff32aa57..5219f68a 100644 --- a/vidur-alibabacloud/vidur/scheduler/replica_scheduler/splitwise_replica_scheduler.py +++ b/vidur-alibabacloud/vidur/scheduler/replica_scheduler/splitwise_replica_scheduler.py @@ -19,7 +19,6 @@ from vidur.scheduler.utils.memory_planner import MemoryPlanner from vidur.scheduler.replica_scheduler.base_replica_scheduler import BaseReplicaScheduler -# > from collections import defaultdict import sys from vidur.entities.node import NodeState, Node @@ -84,8 +83,8 @@ def __init__( - # TODO: > > 没用到了 没有用的代码删一下 - # TODO: > > not used anymore - delete unused code + # TODO(tianhao909): delete unused prompt/token task tracking code + # TODO(tianhao909): 删除未使用的 prompt/token task 跟踪代码 self.prompt_tasks_in_batch = [] self.token_tasks_in_batch = [] @@ -110,15 +109,15 @@ def __init__( # 按到达时间排序的待处理请求列表 # pending requests (not tasks) ordered by arrival time - # TODO: > 看看需不需要删 - # TODO: > check if needs deletion + # TODO(tianhao909): evaluate if pending_requests can be removed + # TODO(tianhao909): 看看需不需要删除 pending_requests self.pending_requests = [] # 专门用于提示任务的待处理队列(优先处理提示) # separate pending queue for prompt tasks (to prioritize prompts) - # TODO > 删一下冗余的 - # TODO > remove redundant items + # TODO(tianhao909): remove redundant prompt queue + # TODO(tianhao909): 删除冗余的 prompt 队列 self.pending_prompt_queue = [] # 请求到任务的映射关系 @@ -199,22 +198,46 @@ def _allocate_request(self, request: Request) -> None: self.allocate(request.id, 1) # Allocate one additional memory block + # fth 260122 这边释放资源; 进入decode的逻辑 写在handle_event里面,用于生成新的event, 这边不生成event; def on_batch_end(self, batch: Batch) -> None: # Called when a batch finishes execution self._num_running_batches -= 1 # Decrement running batch count # 判断是否是pd 分离 # Check if PD separation is enabled if self.replica.replica_type == ReplicaType.MIXED: - assert False, "> debug, PD separation doesn't support mixed yet, must be separated" + assert False, "PD separation doesn't support mixed mode yet, must be separated" pass elif self.replica.replica_type == ReplicaType.PREFILL: for request in batch.requests: if request.completed: self.free(request.id) + + + # 在移除请求之前,先计算当前的kvcache使用情况 + logger.debug(f"Before removing request {request.id}:") + self.replica.get_remaining_kv_cache_capacity() + # 移除请求 留给batch_end_event.py去做 + # self.replica.pending_requests.remove(request) + # 移除请求后释放相应的显存 + self.replica.release_request_kv_cache_memory(request) + logger.debug(f"Request {request.id} removed from replica and GPU memory released") + self.replica.get_remaining_kv_cache_capacity() + elif request.is_prefill_complete == True: # 通过 request 找到对应 decode replica; # Find corresponding decode replica through request; self.free(request.id) + + # 在移除请求之前,先计算当前的kvcache使用情况 + logger.debug(f"Before removing request {request.id}:") + self.replica.get_remaining_kv_cache_capacity() + # 移除请求 留给batch_end_event.py去做 + # self.replica.pending_requests.remove(request) + # 移除请求后释放相应的显存 + self.replica.release_request_kv_cache_memory(request) + logger.debug(f"Request {request.id} removed from replica and GPU memory released") + self.replica.get_remaining_kv_cache_capacity() + d_replica_scheduler = request.global_scheduler.get_replica_scheduler(request.decode_replica_id) # d_replica_scheduler._preempted_requests.append(request) d_replica_scheduler._request_queue.append(request) @@ -226,17 +249,28 @@ def on_batch_end(self, batch: Batch) -> None: # Called when a batch finishes ex # vllm 和 sarathi的 free方法 和 orca的 free 方法不同 # vllm and sarathi free methods differ from orca's free method self.free(request.id) + + # 在移除请求之前,先计算当前的kvcache使用情况 + logger.debug(f"Before removing request {request.id}:") + self.replica.get_remaining_kv_cache_capacity() + # 移除请求 留给batch_end_event.py去做 + # self.replica.pending_requests.remove(request) + # 移除请求后释放相应的显存 + self.replica.release_request_kv_cache_memory(request) + logger.debug(f"Request {request.id} removed from replica and GPU memory released") + self.replica.get_remaining_kv_cache_capacity() + + + elif request.is_prefill_complete == True: self._preempted_requests.append(request) elif request.is_prefill_complete == False: - assert request.decode_arrived_at == float("inf"), "> debug" + assert request.decode_arrived_at == float("inf"), "decode_arrived_at must be infinity for incomplete prefill" - # > 用orca写一版get next batch。 - # > implement get next batch using orca approach. - # @abstractmethod # Mark as abstract method, requiring subclass implementation + # Implement get next batch using orca approach. def _get_next_batch(self) -> Batch: """ @@ -249,15 +283,10 @@ def _get_next_batch(self) -> Batch: Return: List of preempted tasks, List of new tasks """ - # print(f"> Debug: key: sw replica scheduler is forming batch _get_next_batch") - - requests = [] # Store requests to be processed in this batch num_tokens = [] # Store token counts for corresponding requests num_batch_tokens = 0 # Total tokens in current batch - # print(f"> Debug: entering _get_next_batch replica_id={self.replica._id} replica type={self.replica.replica_type}") - if self.replica.replica_type == ReplicaType.MIXED: pass elif self.replica.replica_type == ReplicaType.PREFILL: @@ -265,7 +294,6 @@ def _get_next_batch(self) -> Batch: # Request popping; original request popping didn't put them back, # batch; tmp_requests_to_remove = list() # Record requests to be removed from queue - # print(f"> Debug: entering PREFILL replica path len(self._request_queue)={len(self._request_queue)} len(self._preempted_requests)={len(self._preempted_requests)} ") # 对于batch end 加回来的请求(默认之前的放得下)# (没完成) # 因此 batch end 不能把完成p的request 放回p replica; 但可以放到 d replica中; 不过目前逻辑不需要放入d replica中 # For requests added back by batch end (assuming previous ones fit) # (not completed) # Therefore batch end cannot put completed p requests back to p replica; but can put them in d replica; however current logic doesn't require putting them in d replica @@ -278,37 +306,27 @@ def _get_next_batch(self) -> Batch: requests.append(request) # Add to request list num_tokens.append(next_num_tokens) # Record token count - # TODO: > > 能过去才能把kvcache 传递过去; 逻辑需要check一下;一些极端case,kvcache和req目前在d那边排队, 先传后排; 有可能实际是先排队 然后p2p传输 - # TODO: > > 有空再做:显存池; 判断大家的空间够不够; - # TODO: > > can only pass if space available; logic needs checking; for extreme cases, kvcache and req currently queued at d side, transmitted out of order; might actually queue first then p2p transmit - # TODO: > > do when free: GPU memory pool; check if everyone has enough space; + # TODO(tianhao909): implement GPU memory pool with space validation for KV cache transfer + # TODO(tianhao909): 实现显存池,判断空间是否足够再传递 KV cache + # TODO(tianhao909): handle extreme cases where kvcache queued at decode side before p2p transfer # For unprocessed requests; for request in self._request_queue: - # print(f"> Debug: entering PREFILL replica path _get_next_batch req id ={request.id} still in _req_queue, req.is_prefill_complete={request.is_prefill_complete} req type={request.request_type}, replica_id={self.replica._id} replica type={self.replica.replica_type}" ) - # print(f"> Debug: request id ={request.id} , request_type={request.request_type} _arrived_at={request._arrived_at} num_processed_tokens={request.num_processed_tokens} _latest_iteration_completed_at={request._latest_iteration_completed_at} _latest_iteration_scheduled_at={request._latest_iteration_scheduled_at}") if request.request_type == RequestType.PREFILL and request.is_prefill_complete == False: - # print(f"> Debug: entering PREFILL replica path num_batch_tokens={num_batch_tokens} next_num_tokens={next_num_tokens} max_tokens_in_batch={self._config.max_tokens_in_batch}") # 组batch + 判断到达时间 # Form batch + Check arrival time - # print(f"> Debug: request id ={request.id} , request_type={request.request_type} _arrived_at={request._arrived_at} num_processed_tokens={request.num_processed_tokens} _latest_iteration_completed_at={request._latest_iteration_completed_at} _latest_iteration_scheduled_at={request._latest_iteration_scheduled_at}") - next_num_tokens = self._get_request_next_num_tokens(request) # Get next token count needed by this request assert next_num_tokens == request.num_prefill_tokens if num_batch_tokens + next_num_tokens > self._config.max_tokens_in_batch: # If total batch tokens plus current request tokens exceed limit - # print(f"> Debug: break1: num_batch_tokens={num_batch_tokens} next_num_tokens={next_num_tokens} max_tokens_in_batch={self._config.max_tokens_in_batch}") - # print(f"> Debug: break1: investigate why {self.replica._id} prefill replica has request pileup, can't form batch for request {request._id}") break if len(self._allocation_map) == self._config.batch_size_cap: # If allocation map size reaches batch capacity limit - # print(f"> Debug: break2: investigate why {self.replica._id} prefill replica has request pileup, can't form batch for request {request._id}") break if len(requests) == self._max_micro_batch_size: # If request list size reaches maximum micro-batch size - # print(f"> Debug: break3: investigate why {self.replica._id} prefill replica has request pileup, can't form batch for request {request._id}") break # vllm sarathi method @@ -317,19 +335,23 @@ def _get_next_batch(self) -> Batch: # orca method if not self.can_allocate(self._max_blocks_per_sequence): - # print(f"> Debug: break4: investigate why {self.replica._id} prefill replica has request pileup, can't form batch for request {request._id}") break - # > pop(0) 不能写在遍历队列的循环里面, 遍历完之后再写 - # > pop(0) cannot be written inside queue iteration loop, write after iteration completes + # pop(0) cannot be written inside queue iteration loop, write after iteration completes # request = self._request_queue.pop(0) # Remove and get first request from request queue - # >: vllm and sarathi allocation approach in vidur: + # vllm and sarathi allocation approach in vidur: # self._allocate_request(request) # Allocate request resources - # >: orca allocation approach in vidur: allocate maximum blocks for request + # orca allocation approach in vidur: allocate maximum blocks for request self.allocate(request.id, self._max_blocks_per_sequence) + # self.replica.allocate_request_kv_cache_memory(request, self._max_blocks_per_sequence) + # Pass block_size to correctly convert blocks to tokens + # 传入 block_size,正确将 blocks 转为 tokens + self.replica.allocate_request_kv_cache_memory( + request, self._max_blocks_per_sequence, self._config.block_size) + requests.append(request) # Add request to request list tmp_requests_to_remove.append(request) @@ -338,29 +360,20 @@ def _get_next_batch(self) -> Batch: num_batch_tokens += next_num_tokens # Update total batch tokens elif request.request_type == RequestType.DECODE: - # assert request.request_type == RequestType.DECODE, "> Debug: shouldn't have already popped" continue if not requests: - # print(f"> Debug: failed to form prefill batch self.replica.replica_type={self.replica.replica_type} self.replica._id={self.replica._id} self._replica_id={self._replica_id}, req count={len(requests)}, num_tokens={num_tokens}") return else: - # assert len(tmp_requests_to_remove) == len(requests) ,'> Debug: popped and appended lengths must match' # 遍历完成后,从_request_queue中移除已处理的请求 # After iteration completes, remove processed requests from _request_queue for request in tmp_requests_to_remove: self._request_queue.remove(request) - # 看batch id 里面的req id ; - # Check batch id for req ids; - # for req in requests: - # print(f"> Debug: req id = {req._id}") - # print(f"> Debug: formed prefill batch self.replica.replica_type={self.replica.replica_type} self.replica._id={self.replica._id} self._replica_id={self._replica_id}, req count={len(requests)}, num_tokens={num_tokens}") return Batch(self._replica_id, requests, num_tokens) # Create and return Batch object elif self.replica.replica_type == ReplicaType.DECODE: tmp_requests_to_remove = list() - # print(f"> Debug: entering DECODE replica path len(self._request_queue)={len(self._request_queue)} len(self._preempted_requests)={len(self._preempted_requests)}") # 对于batch end 加回来的请求(默认之前的放得下)# (没完成) # 因此 batch end 不能把完成p的request 放回p replica; 但可以放到 d replica中; 不过目前逻辑不需要放入d replica中 # For requests added back by batch end (assuming previous ones fit) # (not completed) # Therefore batch end cannot put completed p requests back to p replica; but can put them in d replica; however current logic doesn't require putting them in d replica @@ -373,9 +386,7 @@ def _get_next_batch(self) -> Batch: requests.append(request) # Add to request list num_tokens.append(next_num_tokens) # Record token count - # assert len(self._request_queue) == 0, "> debug, > cannot let decode replica have reqs in _request_queue initially" for request in self._request_queue: - # print(f"> Debug: entering DECODE replica path _get_next_batch req id ={request.id} still in _req_queue, req.is_prefill_complete={request.is_prefill_complete} req type={request.request_type}, replica_id={self.replica._id} replica type={self.replica.replica_type}" ) if request.request_type == RequestType.PREFILL: continue @@ -386,22 +397,21 @@ def _get_next_batch(self) -> Batch: if request.is_prefill_complete == True: # 组batch + 判断到达时间 # Form batch + Check arrival time - assert request.decode_arrived_at != float('inf'), "> Debug: check if decode_arrived_at timing has been properly modified" + assert request.decode_arrived_at != float('inf'), "decode_arrived_at must be set before decode batching" # if request.decode_arrived_at == float('inf'): # continue - # > vllm sarathi orca都是这个 获取该请求下一次需要的token数量 - # > vllm sarathi orca all use this to get next token count needed by request + # vllm sarathi orca all use this to get next token count needed by request next_num_tokens = self._get_request_next_num_tokens(request) # decode next_num_tokens can only be 1 - assert next_num_tokens == 1, "> Debug: decode next_num_tokens must be 1" + assert next_num_tokens == 1, "decode next_num_tokens must be 1" # 如果批处理token总数加上当前请求token数超过最大限制 # If total batch tokens plus current request tokens exceed limit if num_batch_tokens + next_num_tokens > self._config.max_tokens_in_batch: - print(f"> Debug: break: num_batch_tokens + next_num_tokens > self._config.max_tokens_in_batch") + logger.debug("break: num_batch_tokens + next_num_tokens > max_tokens_in_batch") break # sarathi、vllm的方法:如果分配映射大小达到批处理容量上限 @@ -428,6 +438,13 @@ def _get_next_batch(self) -> Batch: # orca的方法: 为请求分配最大块数的资源 # orca method: allocate maximum blocks for request self.allocate(request.id, self._max_blocks_per_sequence) + # Pass block_size to correctly convert blocks to tokens + # 传入 block_size,正确将 blocks 转为 tokens + self.replica.allocate_request_kv_cache_memory( + request, self._max_blocks_per_sequence, self._config.block_size) + + # self.replica.allocate_request_kv_cache_memory(request, self._max_blocks_per_sequence) + requests.append(request) # Add request to request list tmp_requests_to_remove.append(request) # Determine token count; prefill tokens @@ -438,29 +455,11 @@ def _get_next_batch(self) -> Batch: continue if requests: - # print(f"> Debug: formed decode batch replica_id={self._replica_id}, req count={len(requests)}, num_tokens={num_tokens}") - # assert len(tmp_requests_to_remove) == len(requests) ,' > popped and appended lengths must match' - # After iteration completes, remove processed requests from _request_queue # 遍历完成后,从_request_queue中移除已处理的请求 for request in tmp_requests_to_remove: self._request_queue.remove(request) - # Check batch id for req ids; - # for req in requests: - # print(f"> Debug: req id = {req._id}") - # print(f"> Debug: formed decode batch self.replica.replica_type={self.replica.replica_type} self.replica._id={self.replica._id} self._replica_id={self._replica_id}, req count={len(requests)}, num_tokens={num_tokens}") return Batch(self._replica_id, requests, num_tokens) # Create and return Batch object if not requests: - # print(f"> Debug: failed to form decode batch, checking self._preempted_requests queue replica_id={self._replica_id}, req count={len(requests)}, num_tokens={num_tokens}") - - return - - - - - - - - - \ No newline at end of file + return \ No newline at end of file diff --git a/vidur-alibabacloud/vidur/scheduler/replica_stage_scheduler/replica_stage_schduler.py b/vidur-alibabacloud/vidur/scheduler/replica_stage_scheduler/replica_stage_schduler.py index 6a153945..4bcafd6b 100644 --- a/vidur-alibabacloud/vidur/scheduler/replica_stage_scheduler/replica_stage_schduler.py +++ b/vidur-alibabacloud/vidur/scheduler/replica_stage_scheduler/replica_stage_schduler.py @@ -42,11 +42,10 @@ def on_schedule(self) -> Tuple[Batch, BatchStage, ExecutionTime]: self._is_busy = True batch = self._batch_queue.pop(0) - # 模拟micro-batch在PP stage上的执行 - # TODO: 这块接入simai - # Simulate micro-batch execution on PP stage - # TODO: Integrate with simai + # 模拟 micro-batch 在 PP stage 上的执行 + # TODO(tianhao909): integrate with SimAI for execution time prediction + # TODO(tianhao909): 接入 SimAI 获取执行时间预测 execution_time = self._execution_time_predictor.get_execution_time( batch, self._stage_id, diff --git a/vidur-alibabacloud/vidur/scheduler/utils/memory_planner.py b/vidur-alibabacloud/vidur/scheduler/utils/memory_planner.py index e769e7b1..312e5a38 100644 --- a/vidur-alibabacloud/vidur/scheduler/utils/memory_planner.py +++ b/vidur-alibabacloud/vidur/scheduler/utils/memory_planner.py @@ -1,24 +1,127 @@ from vidur.config import ReplicaConfig from vidur.entities.replica import Replica +from vidur.logger import init_logger from vidur.utils.param_counter import ParamCounter +logger = init_logger(__name__) + class MemoryPlanner: def __init__(self, replica_config: ReplicaConfig, replica: Replica) -> None: self._param_counter = ParamCounter(replica_config) self._replica = replica + self._replica_config = replica_config + # TODO(tianhao909): support FP8 precision quantization + # TODO(tianhao909): 支持 FP8 精度量化 + if self._replica_config.pd_p2p_comm_dtype == "fp8": + logger.debug(f"FP8 enabled, dtype={self._replica_config.pd_p2p_comm_dtype}") + self.use_fp8 = True + else: + logger.debug(f"FP8 disabled, dtype={self._replica_config.pd_p2p_comm_dtype}") + self.use_fp8 = False + self.tp = self._replica_config.tensor_parallel_size + self.ep = self._replica_config.expert_model_parallel_size - def _get_kv_cache_memory_per_layer_per_request(self) -> int: - return ( - 2 # 2 bytes per float - * 2 # one for key, one for value - * self._replica.attention_head_dim - * self._replica.kv_heads_per_tensor_parallel_worker - * self._replica.max_request_tokens + # refer to https://github.com/alibaba/InferSim/blob/main/kvcache/kvcache.py + def get_mha_kvcache_size(self, config, use_fp8): + """ + Calculate MHA/GQA KV cache size (bytes) for all layers. + 计算所有层的 MHA/GQA KV Cache 大小(字节) + + Args: + config: Model config object / 模型配置对象 + use_fp8: Whether to use FP8 precision / 是否使用 FP8 精度 + + Returns: + int: Total KV cache size in bytes / KV Cache 总大小(字节) + """ + # 2 for K and V; layers * KV heads * head_dim + # 2 表示 K 和 V 两种缓存 + kvcache_size = ( + 2 * config.num_hidden_layers * config.num_key_value_heads * config.head_dim + ) + if not use_fp8: + # FP16/BF16 uses 2x bytes vs FP8 / FP16/BF16 比 FP8 多一倍字节 + kvcache_size *= 2 + return kvcache_size + + # refer to https://github.com/alibaba/InferSim/blob/main/kvcache/kvcache.py + # TODO(tianhao909): verify how TP splits MLA head_dim + # TODO(tianhao909): 核实 TP 如何切分 MLA 的 head_dim + def get_mla_kvcache_size(self, config, use_fp8): + """ + Calculate MLA KV cache size (bytes) for all layers. + 计算所有层的 MLA KV Cache 大小(字节) + + MLA uses kv_lora_rank + qk_rope_head_dim instead of full KV heads. + MLA 使用 kv_lora_rank + qk_rope_head_dim 代替完整 KV 头 + + Args: + config: Model config object / 模型配置对象 + use_fp8: Whether to use FP8 precision / 是否使用 FP8 精度 + + Returns: + int: Total KV cache size in bytes / KV Cache 总大小(字节) + """ + kvcache_size = config.num_hidden_layers * ( + config.kv_lora_rank + config.qk_rope_head_dim ) + if not use_fp8: + # FP16/BF16 uses 2x bytes vs FP8 / FP16/BF16 比 FP8 多一倍字节 + kvcache_size *= 2 + return kvcache_size + + # TODO(tianhao909): re-verify KV cache calc for DeepSeek/Qwen3 + # TODO(tianhao909): 重新核实 DeepSeek/Qwen3 的 KV Cache 计算方法 + # TODO(tianhao909): use per-request prefill/decode seq_len for PD separation + # TODO(tianhao909): PD 分离时传入不同 req 的 prefill_input_seq_len 和 decode_output_seq_len + # TODO(tianhao909): optimize from static to dynamic token allocation + # TODO(tianhao909): 目前按 max_request_tokens 静态分配,后续可优化为动态分配 + def _get_kv_cache_memory_per_layer_per_request(self) -> int: + """Calculate KV cache memory per layer per request (bytes). + 计算每层每请求的 KV Cache 内存(字节)""" + # Currently only DeepSeek-671B uses MLA KV cache + # 当前仅 DeepSeek-671B 使用 MLA KV Cache + if self._replica_config.model_name in ['deepseek-671B']: + kvcache_size_all_layers_per_token = self.get_mla_kvcache_size(self._replica_config.model_config, self.use_fp8) + kvcache_size_per_layer_per_token = kvcache_size_all_layers_per_token / self._replica_config.model_config.num_hidden_layers + prefill_input_seq_len = 1 + decode_output_seq_len = 1 + kvcache_size_per_layer_per_req = kvcache_size_per_layer_per_token * (prefill_input_seq_len + decode_output_seq_len) + return kvcache_size_per_layer_per_req + elif self._replica_config.model_name in ['qwen3-moe-235B', 'qwen3-next-80B']: + kvcache_size_all_layers_per_token = self.get_mha_kvcache_size(self._replica_config.model_config, self.use_fp8) + kvcache_size_per_layer_per_token = kvcache_size_all_layers_per_token / self._replica_config.model_config.num_hidden_layers + prefill_input_seq_len = 1 + decode_output_seq_len = 1 + kvcache_size_per_layer_per_req = kvcache_size_per_layer_per_token * (prefill_input_seq_len + decode_output_seq_len) + return kvcache_size_per_layer_per_req + + else: + # TP shard fallback / TP 切分回退 + # self._kv_heads_per_tensor_parallel_worker = ceil( + # self._model_config.num_kv_heads / self._replica_config.tensor_parallel_size + # ) + + return ( + 2 # 2 bytes per float + * 2 # one for key, one for value + * self._replica.attention_head_dim + * self._replica.kv_heads_per_tensor_parallel_worker + * self._replica.max_request_tokens + ) + # TODO(tianhao909): split get_num_parameters_per_device for P and D replicas + # TODO(tianhao909): 也要 P 和 D 分开适配,分开获得 get_num_parameters_per_device def _get_parameter_memory_per_device(self) -> int: - return 2 * self._param_counter.get_num_parameters_per_device() + """Get model parameter memory per device (bytes or GB for new models). + 获取每设备模型参数内存(字节,新模型返回 GB)""" + # New models return params in GB instead of count + # 三种新模型返回参数 GB 而不是参数个数 + if self._replica_config.model_name in ['deepseek-671B', 'qwen3-moe-235B', 'qwen3-next-80B']: + return self._param_counter.get_num_parameters_per_device() + else: + return 2 * self._param_counter.get_num_parameters_per_device() def _get_kv_cache_memory_per_device_per_request(self) -> int: return ( @@ -26,26 +129,183 @@ def _get_kv_cache_memory_per_device_per_request(self) -> int: ) def get_max_batch_size(self) -> int: - available_memory = ( + """ + Calculate maximum batch size based on GPU memory budget. + 根据 GPU 显存预算计算最大批处理大小 + + Formula / 计算公式: + 1. available_memory = total_GPU_memory * (1 - memory_margin_fraction) + 2. memory_for_kv_cache = available_memory - parameter_memory + 3. number_of_requests = memory_for_kv_cache // kv_cache_per_request + + For PD disaggregation / 对于 PD 分离架构: + - Prefill cluster: fewer params (larger EP), more KV cache memory + Prefill 集群: 参数量较小(EP较大), KV cache 可用内存较多 + - Decode cluster: more params (smaller EP), less KV cache memory + Decode 集群: 参数量较大(EP较小), KV cache 可用内存较少 + + Returns: + int: Maximum concurrent requests / 可同时处理的最大请求数 + """ + # ===== 1. Calculate available GPU memory / 计算GPU可用内存 ===== + available_memory_bytes = ( self._replica.total_memory_gb * 1024**3 * (1 - self._replica.memory_margin_fraction) ) - parameter_memory_per_device = self._get_parameter_memory_per_device() - kv_cache_memory_per_device_per_request = ( - self._get_kv_cache_memory_per_device_per_request() - ) + available_memory_gb = available_memory_bytes / (1024**3) + + if self._replica_config.model_name in ['deepseek-671B', 'qwen3-moe-235B', 'qwen3-next-80B']: + # ===== 2. Get model parameter memory (unit: Bytes) ===== + # Returns triple: (total_param_mem, prefill_param_mem, decode_param_mem) + total_param_mem, prefill_param_mem, decode_param_mem = self._get_parameter_memory_per_device() + + # ===== 3. Get per-request KV cache memory / 获取每请求KV cache内存 ===== + kv_cache_per_request = self._get_kv_cache_memory_per_device_per_request() - memory_for_kv_cache = available_memory - parameter_memory_per_device - number_of_requests = ( - memory_for_kv_cache // kv_cache_memory_per_device_per_request - ) + # ===== 4. Calculate KV cache available memory ===== + # Note: must use per-phase param memory, not total + # 注意: 必须使用各阶段各自的参数内存, PD分离下EP不同导致参数量不同 + prefill_kv_cache_memory = available_memory_bytes - prefill_param_mem # Prefill KV cache available + decode_kv_cache_memory = available_memory_bytes - decode_param_mem # Decode KV cache available + + # ===== 5. Calculate max supported requests ===== + # If KV cache memory is negative, model params exceed GPU memory + if prefill_kv_cache_memory > 0: + prefill_num_requests = int(prefill_kv_cache_memory // kv_cache_per_request) + else: + prefill_num_requests = 0 # OOM, set to 0 + + if decode_kv_cache_memory > 0: + decode_num_requests = int(decode_kv_cache_memory // kv_cache_per_request) + else: + decode_num_requests = 0 # OOM, set to 0 + + # ===== 6. 打印详细调试信息 | Print detailed debug info ===== + logger.info("\n" + "="*80) + logger.info("[MemoryPlanner] GPU Memory Allocation (GPU内存分配详情):") + logger.info("="*80) + logger.info(f" Total GPU mem (GPU总内存): {self._replica.total_memory_gb:.2f} GB") + logger.info(f" Mem margin (内存保留比例): {self._replica.memory_margin_fraction*100:.1f}%") + logger.info(f" Available mem (可用内存): {available_memory_gb:.2f} GB") + logger.info("-"*80) + logger.info(f" Total param mem (总参数内存): {total_param_mem / (1024**3):.2f} GB") + logger.info(f" Prefill param mem (Prefill参数内存): {prefill_param_mem / (1024**3):.2f} GB") + logger.info(f" Decode param mem (Decode参数内存): {decode_param_mem / (1024**3):.2f} GB") + logger.info("-"*80) + logger.info(f" Prefill KV cache avail (Prefill可用内存): {prefill_kv_cache_memory / (1024**3):.2f} GB") + logger.info(f" Decode KV cache avail (Decode可用内存): {decode_kv_cache_memory / (1024**3):.2f} GB") + logger.info(f" Per-req KV cache (每请求KV cache): {kv_cache_per_request / (1024**3):.6f} GB") + logger.info("-"*80) + logger.info(f" Prefill max reqs (Prefill最大请求数): {prefill_num_requests}") + logger.info(f" Decode max reqs (Decode最大请求数): {decode_num_requests}") + logger.info("="*80 + "\n") + + # ===== 7. OOM check and error handling / 内存不足检查与错误处理 ===== + # Check Prefill cluster memory / 检查Prefill集群内存 + if prefill_param_mem > available_memory_bytes: + logger.error(f"Prefill cluster OOM (Prefill集群内存不足)!") + logger.error(f" Param mem needed (需要参数内存): {prefill_param_mem / (1024**3):.2f} GB") + logger.error(f" Available mem (可用内存): {available_memory_gb:.2f} GB") + logger.error(f" Deficit (内存缺口): {(prefill_param_mem - available_memory_bytes) / (1024**3):.2f} GB") + logger.error(f"[Suggestion] Increase TP/EP, use larger GPU, or enable FP8") + + # Check Decode cluster memory / 检查Decode集群内存 + if decode_param_mem > available_memory_bytes: + logger.error(f"Decode cluster OOM (Decode集群内存不足)!") + logger.error(f" Param mem needed (需要参数内存): {decode_param_mem / (1024**3):.2f} GB") + logger.error(f" Available mem (可用内存): {available_memory_gb:.2f} GB") + logger.error(f" Deficit (内存缺口): {(decode_param_mem - available_memory_bytes) / (1024**3):.2f} GB") + logger.error(f"[Suggestion] Increase TP/EP, use larger GPU, or enable FP8") + + # Assert: at least one request must fit / 断言: 至少能处理一个请求 + assert prefill_num_requests > 0, ( + f"Prefill cluster OOM! param_mem({prefill_param_mem/(1024**3):.2f}GB) > " + f"available({available_memory_gb:.2f}GB), increase parallelism or use quantization" + ) + assert decode_num_requests > 0, ( + f"Decode cluster OOM! param_mem({decode_param_mem/(1024**3):.2f}GB) > " + f"available({available_memory_gb:.2f}GB), increase parallelism or use quantization" + ) + + # Return prefill max requests as system upper bound + # 返回Prefill的最大请求数 (作为系统上限) + return int(prefill_num_requests) + else: + parameter_memory_per_device = self._get_parameter_memory_per_device() + kv_cache_memory_per_device_per_request = ( + self._get_kv_cache_memory_per_device_per_request() + ) - assert ( - number_of_requests > 0 - ), "Not enough memory to store even a single request" + memory_for_kv_cache = available_memory_bytes - parameter_memory_per_device + number_of_requests = ( + memory_for_kv_cache // kv_cache_memory_per_device_per_request + ) + + logger.debug(f"available_memory={available_memory_gb}(GB) parameter_memory_per_device={parameter_memory_per_device / (1024**3)}(GB) memory_for_kv_cache={memory_for_kv_cache / (1024**3)} GB kv_cache_memory_per_device_per_request={kv_cache_memory_per_device_per_request / (1024**3)}(GB) number_of_requests={number_of_requests}") - return number_of_requests + assert ( + number_of_requests > 0 + ), "Not enough memory to store even a single request" + + return number_of_requests def get_max_request_slots(self) -> int: return self.get_max_batch_size() * self._replica.num_pipeline_stages + + def get_kv_cache_available_memory(self) -> int: + """ + Get actual available memory for KV cache (bytes). + 获取可用于 KV cache 的真实内存大小(字节) + + Formula / 计算公式: + kv_cache_available = GPU_available_memory - model_param_memory + + Returns: + int: Available memory for KV cache (bytes) / 可用于 KV cache 的内存 + """ + # ===== 1. Calculate available GPU memory (bytes) ===== + available_memory_bytes = ( + self._replica.total_memory_gb + * 1024**3 + * (1 - self._replica.memory_margin_fraction) + ) + + # ===== 2. Get model parameter memory ===== + # Previously used prefill_param_mem for all replicas incorrectly, + # causing decode replica KV cache available memory calculation error. + # Now select param memory based on replica type. + # 之前对所有 replica 都使用 prefill_param_mem, + # 导致 decode replica 的 KV cache 可用内存计算错误。 + # 现在根据 replica 类型选择对应的参数内存。 + if self._replica_config.model_name in ['deepseek-671B', 'qwen3-moe-235B', 'qwen3-next-80B']: + from vidur.entities.replica import ReplicaType + _, prefill_param_mem, decode_param_mem = self._get_parameter_memory_per_device() + + if hasattr(self._replica, 'replica_type') and self._replica.replica_type == ReplicaType.DECODE: + # Decode replica uses decode_param_mem + # Decode 副本使用 decode_param_mem + param_memory = decode_param_mem + logger.debug(f"get_kv_cache_available_memory: " + f"Decode replica uses decode_param_mem={decode_param_mem/(1024**3):.2f} GB " + f"(not prefill_param_mem={prefill_param_mem/(1024**3):.2f} GB)") + else: + # Prefill/Mixed replica uses prefill_param_mem + # Prefill/Mixed 副本使用 prefill_param_mem + param_memory = prefill_param_mem + logger.debug(f"get_kv_cache_available_memory: " + f"Prefill replica uses prefill_param_mem={prefill_param_mem/(1024**3):.2f} GB") + else: + param_memory = self._get_parameter_memory_per_device() + + # ===== 3. Calculate available KV cache memory ===== + kv_cache_available = available_memory_bytes - param_memory + + # Ensure non-negative / 确保非负 + if kv_cache_available < 0: + logger.warning(f"KV cache available is negative! param_mem({param_memory/(1024**3):.2f}GB) > available({available_memory_bytes/(1024**3):.2f}GB)") + assert kv_cache_available >= 0, f"kv_cache_available={kv_cache_available} must be >= 0" + kv_cache_available = 0 + + logger.info(f"[MemoryPlanner] Real KV cache available (真实KV cache可用内存): {kv_cache_available/(1024**3):.2f} GB") + return int(kv_cache_available) diff --git a/vidur-alibabacloud/vidur/simulator.py b/vidur-alibabacloud/vidur/simulator.py index af251876..4b5aede2 100644 --- a/vidur-alibabacloud/vidur/simulator.py +++ b/vidur-alibabacloud/vidur/simulator.py @@ -81,13 +81,15 @@ def run(self) -> None: # 设置系统时间为事件发生的时间 # Set system time to the event occurrence time self._set_time(event._time) - if tmp_pre_debug_time == 0 and event._time > tmp_pre_debug_time : + if tmp_pre_debug_time == 0 and event._time > tmp_pre_debug_time: tmp_pre_debug_time = event._time - elif tmp_pre_debug_time > 0 and tmp_pre_debug_time > event._time: - assert tmp_pre_debug_time <= event._time, f"> debug tmp_pre_debug_time={tmp_pre_debug_time} event._time={event._time}" + elif tmp_pre_debug_time > 0 and tmp_pre_debug_time > event._time: + assert tmp_pre_debug_time <= event._time, ( + f"Event time went backwards: prev={tmp_pre_debug_time} cur={event._time}" + ) - assert event._time >= 0, "> debug" - print(f"> Debug: len(_event_queue){len(self._event_queue)}, event_type={event._event_type} , time={event._time}") + assert event._time >= 0, "Event time must be non-negative" + logger.debug(f"len(_event_queue){len(self._event_queue)}, event_type={event._event_type}, time={event._time}") # 处理事件,事件可能会触发新的事件 # Handle the event, events may trigger new events @@ -106,6 +108,14 @@ def run(self) -> None: assert self._scheduler.is_empty() or self._terminate logger.info(f"Simulation ended at: {self._time}s") + + # [AICB优化] 模拟结束时打印缓存统计并保存查表 + try: + from vidur.entities.execution_time import _GLOBAL_AICB_CACHE + _GLOBAL_AICB_CACHE.print_stats() + _GLOBAL_AICB_CACHE.save_lookup_table() + except Exception as e: + print(f"[WARNING] Cannot print AICB cache stats (无法打印AICB缓存统计): {e}") def _write_output(self) -> None: logger.info("Writing output") @@ -136,7 +146,7 @@ def _init_event_queue(self) -> None: # 生成请求,把请求加入到时间队列中 # Generate requests and add them to the time queue for request in requests: - print(f"> Debug: arrived_at={request.arrived_at} 从 simulator的_init_event_queue() 生成 1个 RequestArrivalEvent, request_id={request._id}") + logger.debug(f"arrived_at={request.arrived_at} RequestArrivalEvent generated, request_id={request._id}") self._add_event(RequestArrivalEvent(request.arrived_at, request)) def _set_time(self, time: float) -> None: diff --git a/vidur-alibabacloud/vidur/types/device_sku_type.py b/vidur-alibabacloud/vidur/types/device_sku_type.py index f6f98b69..70696e1e 100644 --- a/vidur-alibabacloud/vidur/types/device_sku_type.py +++ b/vidur-alibabacloud/vidur/types/device_sku_type.py @@ -6,3 +6,6 @@ class DeviceSKUType(BaseIntEnum): A100 = 2 H100 = 3 H800 = 4 + H20 = 5 + H200 = 6 + GB200 = 7 diff --git a/vidur-alibabacloud/vidur/types/node_sku_type.py b/vidur-alibabacloud/vidur/types/node_sku_type.py index 19df26b0..8e86682b 100644 --- a/vidur-alibabacloud/vidur/types/node_sku_type.py +++ b/vidur-alibabacloud/vidur/types/node_sku_type.py @@ -8,3 +8,4 @@ class NodeSKUType(BaseIntEnum): A100_DGX = 4 H100_DGX = 5 H800_DGX = 6 + H20_DGX = 7 diff --git a/vidur-alibabacloud/vidur/utils/mfu_calculator.py b/vidur-alibabacloud/vidur/utils/mfu_calculator.py index fecab53a..8f3d8b13 100644 --- a/vidur-alibabacloud/vidur/utils/mfu_calculator.py +++ b/vidur-alibabacloud/vidur/utils/mfu_calculator.py @@ -1,13 +1,59 @@ from vidur.config import ReplicaConfig from vidur.entities import BatchStage +from vidur.entities.request import RequestType from vidur.utils.param_counter import ParamCounter +# MoE模型列表:这些模型需要区分 prefill/decode 的参数量 +# MoE model list: These models need separate prefill/decode parameter counts +MOE_MODELS_WITH_PD_SEPARATION = ['deepseek-671B', 'qwen3-moe-235B', 'qwen3-next-80B'] + + class MFUCalculator: + """ + MFU (Model FLOPs Utilization) 计算器 + 计算模型计算效率,支持 prefill/decode 分离场景 + + MFU (Model FLOPs Utilization) Calculator + Calculates model compute efficiency, supports prefill/decode separation scenarios + """ def __init__(self, replica_config: ReplicaConfig): + self._replica_config = replica_config + self._model_name = replica_config.model_name + + # 判断是否是需要区分 prefill/decode 的 MoE 模型 + # Determine if this is a MoE model requiring prefill/decode separation + self._is_pd_separated_model = self._model_name in MOE_MODELS_WITH_PD_SEPARATION + param_counter = ParamCounter(replica_config) - self._num_params_per_device = param_counter.get_num_parameters_per_device() + + # 根据模型类型获取参数量 + # Get parameter counts based on model type + if self._is_pd_separated_model: + # MoE模型:返回三元组 (total, prefill, decode) + # MoE model: returns tuple (total, prefill, decode) + params_result = param_counter.get_num_parameters_per_device() + self._num_params_per_device = params_result[0] # 总参数量 | Total params + self._prefill_num_params_per_device = params_result[1] # Prefill参数量 | Prefill params + self._decode_num_params_per_device = params_result[2] # Decode参数量 | Decode params + + # 打印重要信息便于检查 | Print important info for verification + print(f"[MFUCalculator] MoE model PD separation mode (MoE模型 PD分离模式)") + print(f"[MFUCalculator] model_name={self._model_name}") + print(f"[MFUCalculator] num_params_per_device (total)={self._num_params_per_device / 1024 / 1024 / 1024:.4f} GB") + print(f"[MFUCalculator] prefill_num_params_per_device={self._prefill_num_params_per_device / 1024 / 1024 / 1024:.4f} GB") + print(f"[MFUCalculator] decode_num_params_per_device={self._decode_num_params_per_device / 1024 / 1024 / 1024:.4f} GB") + else: + # 普通模型:返回单个值 + # Normal model: returns single value + self._num_params_per_device = param_counter.get_num_parameters_per_device() + self._prefill_num_params_per_device = self._num_params_per_device + self._decode_num_params_per_device = self._num_params_per_device + + print(f"[MFUCalculator] Normal model mode (普通模型模式)") + print(f"[MFUCalculator] model_name={self._model_name}") + print(f"[MFUCalculator] num_params_per_device={self._num_params_per_device}") model_config = replica_config.model_config @@ -20,9 +66,45 @@ def __init__(self, replica_config: ReplicaConfig): self._head_dimension = model_config.embedding_dim // model_config.num_q_heads self._device_flops = replica_config.device_config.fp16_tflops * 2**40 + def _get_batch_stage_type(self, batch_stage: BatchStage) -> RequestType: + """ + 获取 batch_stage 的类型(prefill 或 decode) + 通过检查第一个 request 的类型来判断 + + Get batch_stage type (prefill or decode) + Determined by checking the first request's type + """ + if not batch_stage.requests: + return RequestType.MIXED + # 假设同一个 batch_stage 中所有 request 类型相同 + # Assume all requests in the same batch_stage have the same type + return batch_stage.requests[0].request_type + def _get_mlp_flops(self, batch_stage: BatchStage) -> float: + """ + 计算 MLP 层的 FLOPs + 根据 batch_stage 类型选择对应的参数量 + + Calculate MLP layer FLOPs + Select corresponding parameter count based on batch_stage type + """ num_tokens = sum(batch_stage.num_tokens) - return 2 * num_tokens * self._num_params_per_device + + # 对于 MoE 模型,根据 stage 类型选择参数量 + # For MoE models, select parameter count based on stage type + if self._is_pd_separated_model: + stage_type = self._get_batch_stage_type(batch_stage) + if stage_type == RequestType.PREFILL: + params = self._prefill_num_params_per_device + elif stage_type == RequestType.DECODE: + params = self._decode_num_params_per_device + else: + # MIXED 类型使用总参数量 | MIXED type uses total params + params = self._num_params_per_device + else: + params = self._num_params_per_device + + return 2 * num_tokens * params def _get_attention_flops(self, batch_stage: BatchStage) -> float: total_flops = 0 @@ -42,5 +124,12 @@ def get_mfu(self, batch_stage: BatchStage) -> float: mlp_flops = self._get_mlp_flops(batch_stage) attention_flops = self._get_attention_flops(batch_stage) total_flops = mlp_flops + attention_flops + + # 防止除零错误:如果execution_time为0,返回0 + # Prevent division by zero: return 0 if execution_time is 0 + if batch_stage.execution_time == 0: + print(f"[WARNING] batch_stage.execution_time is 0, returning MFU as 0") + return 0.0 + total_flops_per_second = total_flops / batch_stage.execution_time return total_flops_per_second * 100 / self._device_flops diff --git a/vidur-alibabacloud/vidur/utils/param_counter.py b/vidur-alibabacloud/vidur/utils/param_counter.py index 5ef348f0..af678868 100644 --- a/vidur-alibabacloud/vidur/utils/param_counter.py +++ b/vidur-alibabacloud/vidur/utils/param_counter.py @@ -1,12 +1,18 @@ from math import ceil from vidur.config import ReplicaConfig +from vidur.logger import init_logger +import os +import json + +logger = init_logger(__name__) class ParamCounter: def __init__(self, replica_config: ReplicaConfig) -> None: self._replica_config = replica_config self._model_config = self._replica_config.model_config + self.config = self._model_config assert ( self._model_config.num_q_heads % self._replica_config.tensor_parallel_size @@ -34,6 +40,114 @@ def __init__(self, replica_config: ReplicaConfig) -> None: self._kv_heads_per_tensor_parallel_worker = ceil( self._model_config.num_kv_heads / self._replica_config.tensor_parallel_size ) + + # TODO(tianhao909): support FP8 precision quantization + # TODO(tianhao909): 支持 FP8 精度量化 + if self._replica_config.pd_p2p_comm_dtype == "fp8": + logger.debug(f"FP8 enabled, dtype={self._replica_config.pd_p2p_comm_dtype}") + self.use_fp8 = True + else: + logger.debug(f"FP8 disabled, dtype={self._replica_config.pd_p2p_comm_dtype}") + self.use_fp8 = False + self.tp = self._replica_config.tensor_parallel_size + self.ep = self._replica_config.expert_model_parallel_size + + # 标记是否已经打印过调试信息 | Flag to track if debug info has been printed + self._debug_printed = False + + if self._replica_config.model_name in ['deepseek-671B', 'qwen3-moe-235B', 'qwen3-next-80B']: + self.model_config_postprocessing() + # self._model_config + + def model_config_postprocessing(self, ): + # 初始化配置字典 | Initialize configuration dictionary + d = dict() + if self._replica_config.model_name == 'deepseek-671B': + # 使用相对路径定位配置文件 | Use relative path to locate config file + config_path = os.path.join(os.path.dirname(__file__), "..", "..", "data", "hf_configs", "deepseek_v3_config.json") + elif self._replica_config.model_name == 'qwen3-moe-235B': + # TODO(tianhao909): add corresponding JSON config file for Qwen3-MoE + # TODO(tianhao909): 增加对应的 JSON 配置文件 + config_path = os.path.join(os.path.dirname(__file__), "..", "..", "data", "hf_configs", "qwen3_moe_config.json") + elif self._replica_config.model_name == 'qwen3-next-80B': + config_path = os.path.join(os.path.dirname(__file__), "..", "..", "data", "hf_configs", "qwen3-next-80B-A3B_config.json") + logger.debug(f"config_path={config_path}") + # 检查配置文件是否存在 | Check if config file exists + if not os.path.exists(config_path): + logger.warning(f"Config file {config_path} not found, using default config") + return + # 以只读模式加载JSON配置 | Load JSON config in read-only mode + with open(config_path, "r") as f: + d = json.load(f) + + # 模型隐藏层维度 | Model hidden size dimension + self._model_config.hidden_size = d["hidden_size"] + # 隐藏层数量 | Number of hidden layers + self._model_config.num_hidden_layers = d["num_hidden_layers"] + + # 判断是否使用混合注意力机制(全注意力与线性注意力交替) + # Determine if using hybrid attention (alternating full and linear attention) + self._model_config.is_hybrid_linear = d.get("full_attention_interval") is not None + if self._model_config.is_hybrid_linear: + # 全注意力层数量:每隔N层插入一次 | Full attention layers: inserted every N layers + self._model_config.num_full_attn_layers = ( + self._model_config.num_hidden_layers // d["full_attention_interval"] + ) + # 线性注意力层数量:总层数减去全注意力层数 | Linear attention layers: total - full attention + self._model_config.num_linear_attn_layers = ( + self._model_config.num_hidden_layers - self._model_config.num_full_attn_layers + ) + # 线性注意力卷积核维度 | Linear attention convolution kernel dimension + self._model_config.linear_conv_kernel_dim = d["linear_conv_kernel_dim"] + # 线性注意力键向量头维度 | Linear attention key head dimension + self._model_config.linear_key_head_dim = d["linear_key_head_dim"] + # 线性注意力键向量头数 | Linear attention number of key heads + self._model_config.linear_num_key_heads = d["linear_num_key_heads"] + # 线性注意力值向量头维度 | Linear attention value head dimension + self._model_config.linear_value_head_dim = d["linear_value_head_dim"] + # 线性注意力值向量头数 | Linear attention number of value heads + self._model_config.linear_num_value_heads = d["linear_num_value_heads"] + + self._model_config.attn_type = "MHA/GQA" # Default attention: MHA or GQA / 默认注意力类型 + if "kv_lora_rank" in d: # If kv_lora_rank present, use MLA attention / 如果配置中包含 kv_lora_rank,则使用MLA + self._model_config.attn_type = "MLA" + + # Attention mechanism parameter setup / 注意力机制相关参数设置 + if self._model_config.attn_type == "MHA/GQA": # MHA/GQA type + self._model_config.num_attention_heads = d["num_attention_heads"] # Number of attention heads / 注意力头数量 + self._model_config.num_key_value_heads = d["num_key_value_heads"] # KV heads for GQA / 键和值的头数量 + if "head_dim" in d: # If head_dim specified in config + self._model_config.head_dim = d["head_dim"] + else: + self._model_config.head_dim = self._model_config.hidden_size // self._model_config.num_attention_heads # Compute from hidden_size / heads + elif self._model_config.attn_type == "MLA": # MLA type + self._model_config.q_lora_rank = d["q_lora_rank"] # Query LoRA rank / 查询向量LoRA的秩 + self._model_config.qk_nope_head_dim = d["qk_nope_head_dim"] # QK no-position head dim / 无位置编码的QK头维度 + self._model_config.qk_rope_head_dim = d["qk_rope_head_dim"] # QK RoPE head dim / 使用RoPE编码的QK头维度 + self._model_config.kv_lora_rank = d["kv_lora_rank"] # KV LoRA rank / 键值对LoRA的秩 + self._model_config.num_attention_heads = d["num_attention_heads"] # Total attention heads / 注意力头总数 + self._model_config.v_head_dim = d["v_head_dim"] # Value head dim / 值向量每个头的维度 + self._model_config.qk_head_dim = self._model_config.qk_nope_head_dim + self._model_config.qk_rope_head_dim # QK total head dim = nope + rope + + # FFN/MoE (Feed-Forward Network / Mixture of Experts) configuration + # FFN/MoE(前馈网络/专家混合模型)配置 + self._model_config.is_moe = True # Default enable MoE / 默认启用MoE + if "num_routed_experts" in d: # Routed expert count / 路由专家数量 + self._model_config.num_routed_experts = d["num_routed_experts"] + elif "num_experts" in d: # Fallback to num_experts field + self._model_config.num_routed_experts = d["num_experts"] + else: + self._model_config.is_moe = False # No MoE if no expert fields / 不使用MoE + self._model_config.num_routed_experts = 1 # Single expert (standard FFN) / 单一专家 + + if self._model_config.is_moe: # If MoE enabled / 如果启用了MoE + self._model_config.num_experts_per_tok = d["num_experts_per_tok"] # Experts activated per token / 每个token激活的专家数 + self._model_config.intermediate_size = d["moe_intermediate_size"] # Per-expert intermediate size / 每个专家的中间层大小 + self._model_config.num_shared_experts = d.get("num_shared_experts", 0) # Shared expert count / 共享专家数量 + else: # Standard FFN (no MoE) / 未启用MoE + self._model_config.num_experts_per_tok = 1 # Single "expert" / 标准FFN + self._model_config.intermediate_size = d["intermediate_size"] # Standard FFN intermediate size / 标准FFN中间层大小 + self._model_config.num_shared_experts = 0 # No shared experts / 无共享专家 def get_num_parameters_per_layer(self) -> int: num_parameters = 0 @@ -69,7 +183,346 @@ def get_num_parameters_per_layer(self) -> int: ) return num_parameters + + # Layer Dimension + # First 3 layers are dense (no gate). Based on the above calculation, + # each of the first 3 layers of DeepSeek V3 has parameter count: + # Layer 维度 + # 前 3 层是 dense,没有 gate,基于上面的计算,DeepSeek V3 前 3 层每层的参数量是: + # (单层MLA中Q的LoRA参数量48,760,320 + 单层MLA中KV的LoRA参数量20,906,496 + 单层 MLA中WO的参数量117,440,512 + (pre+post)attention layernorm的参数14336(即7168+7168)) + (每个专家的参数量44,040,192 * 9 (9 因为前 3 层 dense,每层固定激活8个路由专家和一个共享专家)) + # (48,760,320 + 20,906,496 + 117,440,512 + 14336) + (44,040,192 * 9) = 583,483,392 + # Last 58 layers are MoE sparse-activated experts. DeepSeek V3 per-layer params: + # 后 58 层是 MoE 稀疏激活专家,基于上面的计算,DeepSeek V3 后 58 层每层的参数量是: + # (48,760,320 + 20,906,496 + 117,440,512 + 14336) + (44,040,192 * 257 + 1,835,264) = 11,507,286,272 + # (单层MLA中Q的LoRA参数量48,760,320 + 单层MLA中KV的LoRA参数量20,906,496 + 单层 MLA中WO的参数量117,440,512 + (pre+post)attention layernorm的参数14336(即7168+7168)) + (每个专家的参数量44,040,192 * 257 (256个路由专家和一个共享专家) + 路由 Gate 的参数量1,835,264) + def get_num_parameters_per_layer_by_layer_id(self, layer_id: int = 0) -> tuple: + """ + Get parameter count per layer by layer_id. + Returns tuple: (params_per_layer, prefill_params_per_layer, decode_params_per_layer) + + 根据 layer_id 获取每层的参数量 + 返回三元组: (params_per_layer, prefill_params_per_layer, decode_params_per_layer) + """ + # 初始化变量 | Initialize variables + params_per_layer_per_gpu = 0 + prefill_params_per_layer_per_gpu = 0 + decode_params_per_layer_per_gpu = 0 + + if self._replica_config.model_name == 'deepseek-671B': + # 仅在首次调用时打印调试信息 | Only print debug info on first call + if not self._debug_printed: + logger.info("{s:{c}^{n}}".format(s="[ParamCounter] DeepSeek-671B Model Weights", n=60, c="-")) + attn_params_bytes = self.get_attn_params_size(self._model_config, self.use_fp8) + expert_params_bytes = self.get_expert_params_size(self._model_config, self.use_fp8) + logger.info(f"[ParamCounter] One MLA params size (MB): {attn_params_bytes / 1024 / 1024:.2f}") + logger.info(f"[ParamCounter] One expert params size (MB): {expert_params_bytes / 1024 / 1024:.2f}") + logger.info(f"[ParamCounter] use_fp8={self.use_fp8}, tp={self.tp}, ep={self.ep}") + self._debug_printed = True + + if layer_id >= 0 and layer_id <= 2: + # 前 3 层是 dense,每层固定激活8个路由专家和1个共享专家 + # First 3 layers are dense, each layer activates 8 routed experts + 1 shared expert + params_per_layer_per_gpu = (self.get_mla_params_size(self._model_config, self.use_fp8)/self.tp + + self.get_expert_params_size(self._model_config, self.use_fp8) * (8 + 1) / self.tp) + prefill_params_per_layer_per_gpu = params_per_layer_per_gpu + decode_params_per_layer_per_gpu = params_per_layer_per_gpu + + elif layer_id >= 3 and layer_id <= 60: + # 后 58 层是 MoE 稀疏激活专家 + # Remaining 58 layers are MoE sparse activated experts + mla_params = self.get_mla_params_size(self._model_config, self.use_fp8) / self.tp + expert_params = self.get_expert_params_size(self._model_config, self.use_fp8) + + params_per_layer_per_gpu = mla_params + expert_params * (256/self.ep + 1) + prefill_params_per_layer_per_gpu = mla_params + expert_params * (256/self._replica_config.prefill_world_size + 1) + decode_params_per_layer_per_gpu = mla_params + expert_params * (256/self._replica_config.decode_world_size + 1) + + elif self._replica_config.model_name == 'qwen3-next-80B': + # 仅在首次调用时打印调试信息 | Only print debug info on first call + if not self._debug_printed: + logger.info("{s:{c}^{n}}".format(s="[ParamCounter] Qwen3-Next-80B Model Weights", n=60, c="-")) + full_attn_params_bytes = self.get_attn_params_size(self._model_config, self.use_fp8) + linear_attn_params_bytes = self.get_linear_attn_params_size(self._model_config, self.use_fp8) + expert_params_bytes = self.get_expert_params_size(self._model_config, self.use_fp8) + logger.info(f"[ParamCounter] One full attn params size (MB): {full_attn_params_bytes / 1024 / 1024:.2f}") + logger.info(f"[ParamCounter] One linear attn params size (MB): {linear_attn_params_bytes / 1024 / 1024:.2f}") + logger.info(f"[ParamCounter] One expert params size (MB): {expert_params_bytes / 1024 / 1024:.2f}") + logger.info(f"[ParamCounter] use_fp8={self.use_fp8}, tp={self.tp}, ep={self.ep}") + self._debug_printed = True + + full_attn_params_bytes = self.get_attn_params_size(self._model_config, self.use_fp8) + linear_attn_params_bytes = self.get_linear_attn_params_size(self._model_config, self.use_fp8) + expert_params_bytes = self.get_expert_params_size(self._model_config, self.use_fp8) + + # 基础参数: 专家网络部分 | Base params: expert network part + params_per_layer_per_gpu = expert_params_bytes * ( + self.config.num_shared_experts + self.config.num_routed_experts / self._replica_config.world_size + ) + prefill_params_per_layer_per_gpu = expert_params_bytes * ( + self.config.num_shared_experts + self.config.num_routed_experts / self._replica_config.prefill_world_size + ) + decode_params_per_layer_per_gpu = expert_params_bytes * ( + self.config.num_shared_experts + self.config.num_routed_experts / self._replica_config.decode_world_size + ) + + # 根据 layer_id 添加注意力层参数 | Add attention layer params based on layer_id + if layer_id % 4 == 3: # Full attention layers (e.g., layer 3, 7, 11...) + params_per_layer_per_gpu += full_attn_params_bytes / self.tp + prefill_params_per_layer_per_gpu += full_attn_params_bytes / self.tp + decode_params_per_layer_per_gpu += full_attn_params_bytes / self.tp + else: # Linear attention layers (e.g., layer 0, 1, 2, 4, 5, 6...) + params_per_layer_per_gpu += linear_attn_params_bytes / self.tp + prefill_params_per_layer_per_gpu += linear_attn_params_bytes / self.tp + decode_params_per_layer_per_gpu += linear_attn_params_bytes / self.tp + + elif self._replica_config.model_name == 'qwen3-moe-235B': + # 仅在首次调用时打印调试信息 | Only print debug info on first call + if not self._debug_printed: + logger.info("{s:{c}^{n}}".format(s="[ParamCounter] Qwen3-MoE-235B Model Weights", n=60, c="-")) + attn_params_bytes = self.get_mha_params_size(self._model_config, self.use_fp8) + expert_params_bytes = self.get_expert_params_size(self._model_config, self.use_fp8) + logger.info(f"[ParamCounter] One MHA params size (MB): {attn_params_bytes / 1024 / 1024:.2f}") + logger.info(f"[ParamCounter] One expert params size (MB): {expert_params_bytes / 1024 / 1024:.2f}") + logger.info(f"[ParamCounter] use_fp8={self.use_fp8}, tp={self.tp}, ep={self.ep}") + self._debug_printed = True + + # Qwen3-MoE-235B: 128个路由专家, 0个共享专家, MHA/GQA注意力, 没有dense层 + # Qwen3-MoE-235B: 128 routed experts, 0 shared experts, MHA/GQA attention, no dense layers + mha_params = self.get_mha_params_size(self._model_config, self.use_fp8) + expert_params = self.get_expert_params_size(self._model_config, self.use_fp8) + + params_per_layer_per_gpu = mha_params + expert_params * 128 + prefill_params_per_layer_per_gpu = mha_params/self.tp + expert_params * (128/self._replica_config.prefill_world_size) + decode_params_per_layer_per_gpu = mha_params/self.tp + expert_params * (128/self._replica_config.decode_world_size) + + return params_per_layer_per_gpu, prefill_params_per_layer_per_gpu, decode_params_per_layer_per_gpu def get_num_parameters_per_device(self) -> int: - num_parameters_per_layer = self.get_num_parameters_per_layer() - return num_parameters_per_layer * self._num_layers_per_pipeline_stage + # TODO(tianhao909): refactor per-layer param calculation with layer_id support + # TODO(tianhao909): 重构 get_num_parameters_per_device 支持按 layer_id 计算 + if self._replica_config.model_name in ['deepseek-671B', 'qwen3-moe-235B', 'qwen3-next-80B']: + # Reference: see ExecutionTime._get_block_execution_time_by_layer_id + # Need to get start/end layer_id for the current pipeline stage + # 参考 ExecutionTime._get_block_execution_time_by_layer_id 的实现 + # 需要获取当前pipeline stage的起始和结束layer id + # try: + + pipeline_stage_id = getattr(self, '_pipeline_stage_id', 0) + start_layer = pipeline_stage_id * self._num_layers_per_pipeline_stage + end_layer = start_layer + self._num_layers_per_pipeline_stage + logger.debug(f"pipeline_stage_id={pipeline_stage_id} num_layers_per_pipeline_stage={self._num_layers_per_pipeline_stage} start_layer={start_layer} end_layer={end_layer}") + + params_per_gpu = 0 + prefill_params_per_gpu = 0 # 修正变量名 | Fixed variable name + decode_params_per_gpu = 0 # 修正变量名 | Fixed variable name + for layer_id in range(start_layer, end_layer): + params_per_layer, prefill_params_per_layer, decode_params_per_layer = self.get_num_parameters_per_layer_by_layer_id(layer_id) + params_per_gpu += params_per_layer + prefill_params_per_gpu += prefill_params_per_layer + decode_params_per_gpu += decode_params_per_layer + + # params_per_gpu 单位是B | Unit is Bytes + params_per_gpu_gb = params_per_gpu / 1024 / 1024 / 1024 # Convert to GB / 转换为GB + prefill_params_per_gpu_gb = prefill_params_per_gpu / 1024 / 1024 / 1024 # Convert to GB / 转换为GB + decode_params_per_gpu_gb = decode_params_per_gpu / 1024 / 1024 / 1024 # Convert to GB / 转换为GB + logger.info("{:<40} {:<10.2f}".format("Per GPU params size (GB):", params_per_gpu_gb)) + logger.info("{:<40} {:<10.2f}".format("Prefill Per GPU params size (GB):", prefill_params_per_gpu_gb)) + logger.info("{:<40} {:<10.2f}".format("Decode Per GPU params size (GB) :", decode_params_per_gpu_gb)) + logger.info(f"Prefill: tp={self.tp} dp={self._replica_config._num_prefill_replicas} ep={self._replica_config.prefill_world_size} prefill_params_per_gpu_gb={prefill_params_per_gpu_gb} (GB)") + logger.info(f"Decode: tp={self.tp} dp={self._replica_config._num_decode_replicas} ep={self._replica_config.decode_world_size} decode_params_per_gpu_gb={decode_params_per_gpu_gb} (GB)") + assert self._replica_config._num_prefill_replicas % 1 == 0 and self._replica_config._num_decode_replicas % 1 == 0, "Prefill and Decode replicas must be integer" + + # # 计算每张GPU上的模型参数总量(包括共享专家和路由专家) + # params_per_gpu = attn_params_bytes + expert_params_bytes * ( + # self._model_config.num_shared_experts + # + self._model_config.num_routed_experts / self.ep + # ) + + # params_per_gpu = params_per_gpu / 1024 / 1024 / 1024 # 转换为GB + # params_per_gpu *= self._model_config.num_hidden_layers # 乘以层数得到总参数量 + # # 计算可用KV缓存内存(总显存减去模型参数、运行时开销和编码器预留) + # self.kvcache_mem = ( + # self.gpu.mem - params_per_gpu - 15 - 5 + # ) # 15GB for runtime, 5GB for encoder(15GB用于运行时,5GB用于编码器) + # print("{:<40} {:<10.2f}".format("Per GPU params size (GB):", params_per_gpu)) # 打印每GPU参数大小(GB) + + # Return tuple: (total params, prefill params, decode params) + # 返回三元组: (总参数量, prefill参数量, decode参数量) + return params_per_gpu, prefill_params_per_gpu, decode_params_per_gpu + + + # except AttributeError: + # # 如果无法获取_pipeline_stage_id,则回退到原来的实现 + # num_parameters_per_layer = self.get_num_parameters_per_layer() + # return num_parameters_per_layer * self._num_layers_per_pipeline_stage + else: + num_parameters_per_layer = self.get_num_parameters_per_layer() + return num_parameters_per_layer * self._num_layers_per_pipeline_stage + + def get_attn_params_size(self, config, use_fp8): + if config.attn_type == "MHA/GQA": # MHA or GQA attention type / MHA或GQA注意力类型 + return get_mha_params_size(self, config, use_fp8) + elif config.attn_type == "MLA": # MLA architecture / MLA结构 + return get_mla_params_size(self, config, use_fp8) + + + # Reference: /InferSim/params/params.py + # 参考自 /InferSim/params/params.py + # def get_mha_params_size(config: ModelConfig, use_fp8: bool): + def get_mha_params_size(self, config, use_fp8): + wq = config.hidden_size * config.num_attention_heads * config.head_dim # Q weight: hidden * heads * head_dim + wk = config.hidden_size * config.num_key_value_heads * config.head_dim # K weight: hidden * kv_heads * head_dim + wv = config.hidden_size * config.num_key_value_heads * config.head_dim # V weight: hidden * kv_heads * head_dim + wo = config.hidden_size * config.num_attention_heads * config.head_dim # Output weight: hidden * heads * head_dim + if use_fp8: # FP8 quantization / FP8量化 + return wq + wk + wv + wo # Single precision storage / 单精度存储 + return 2 * (wq + wk + wv + wo) # Full precision (e.g. FP16, 2 bytes per param) / 全精度 + + # MLA (suited for DeepSeek) / MLA(适合 DeepSeek) + # DeepSeek V3 parameter derivation references: + # dpsk v3 参数推导参考: + # https://zhuanlan.zhihu.com/p/21455638257 + # https://yangwenbo.com/articles/deepseek-v3-parameter-size.html + # "hidden_size": 7168, + # "num_key_value_heads": 128, + # "v_head_dim": 128, + # "kv_lora_rank": 512, + + # "num_attention_heads": 128, + # "q_lora_rank": 1536, + + # "qk_nope_head_dim": 128, + # "qk_rope_head_dim": 64, + + # "num_hidden_layers": 61, + # def get_mla_params_size(config: ModelConfig, use_fp8: bool): + def get_mla_params_size(self, config, use_fp8): + # Per-layer MLA Q LoRA params: + # 单层 MLA 中 Q 的 LoRA 参数量是: + # = 7168 * 1536 + 1536 + 1536 * 128 * (128 + 64) = 48,760,320 + # = wq_down + wq_up + # = (config.hidden_size * config.q_lora_rank) + (config.q_lora_rank * config.num_attention_heads * (config.qk_nope_head_dim + config.qk_rope_head_dim)) + # = (config.hidden_size * config.q_lora_rank) + (config.q_lora_rank * config.num_attention_heads * (config.qk_head_dim)) + wq_down = config.hidden_size * config.q_lora_rank # Q LoRA down-projection / Q的LoRA下投影矩阵参数量 + wq_up = config.q_lora_rank * config.num_attention_heads * config.qk_head_dim # Q LoRA up-projection / Q的LoRA上投影矩阵参数量 + # Per-layer MLA KV LoRA params: + # 单层 MLA 中 KV 的 LoRA 参数量是: + # = 7168 * (512 + 64) + 512 + 512 * 128 * (128 + 128) = 20,906,496 + # = wkv_down + 512 + wkv_up (TODO(tianhao909): clarify what the 512 constant represents) + # = config.hidden_size *(config.kv_lora_rank + config.qk_rope_head_dim) + 512 + config.kv_lora_rank * config.num_attention_heads * (config.qk_nope_head_dim + config.qk_rope_head_dim) + # = (config.hidden_size * config.kv_lora_rank) + (config.kv_lora_rank * config.num_key_value_heads * (config.qk_nope_head_dim + config.qk_rope_head_dim)) + wkv_down = config.hidden_size * config.kv_lora_rank # KV LoRA down-projection / KV的LoRA下投影矩阵参数量 + wkv_up = ( # KV LoRA up-projection / KV的LoRA上投影矩阵参数量 + config.kv_lora_rank + * config.num_attention_heads + * (config.qk_nope_head_dim + config.v_head_dim) + ) + # Per-layer MLA output (WO) params: + # 单层 MLA 中 WO 的参数量是 + # 128 * 128 * 7168 = 117,440,512 + # config.num_attention_heads * config.v_head_dim * config.hidden_size + wo = config.hidden_size * config.num_attention_heads * config.v_head_dim # Output weight / 输出权重参数量 + if use_fp8: # FP8 quantization / FP8量化 + return wq_down + wq_up + wkv_down + wkv_up + wo # Sum all params (single precision) / 返回所有参数之和 + # Unit: Bytes / 单位:B + return 2 * (wq_down + wq_up + wkv_down + wkv_up + wo) # FP16: multiply by 2 / 否则乘以2 + + # Additionally: pre+post attention layernorm params = 7168*2 = 14,336 + # DeepSeek V3 MLA total across 61 layers: + # 另外:pre+post attention layernorm 的参数量 = 7168*2 = 14,336 + # 所以 DeepSeek V3 的 MLA 部分共 61 层的总参数量是: + # (48,760,320 + 20,906,496 + 117,440,512 + 14,336) * 61 = 11,414,421,504 (~11B) + + + # def get_gdn_params_size(config: ModelConfig, use_fp8: bool): + def get_gdn_params_size(self, config, use_fp8): + wq = config.hidden_size * config.linear_num_key_heads * config.linear_key_head_dim # Q linear attention weight + wk = wq # K weight same as Q + wv = ( # V weight params + config.hidden_size + * config.linear_num_value_heads + * config.linear_value_head_dim + ) + wz = wv # Z weight same as V + wa = config.hidden_size * config.linear_num_value_heads # A gate params + wb = wa # B gate same as A + s = wq + wk + wv + wz + wa + wb # Total primary weight params + wconv = ( # Conv kernel weight part 1 + config.linear_num_key_heads + * config.linear_key_head_dim + * config.linear_conv_kernel_dim + ) + wconv += ( # Conv kernel weight part 2 + config.linear_num_key_heads + * config.linear_key_head_dim + * config.linear_conv_kernel_dim + ) + wconv += ( # Conv kernel weight part 3 + config.linear_num_value_heads + * config.linear_value_head_dim + * config.linear_conv_kernel_dim + ) + if use_fp8: # FP8 quantization + return s + wconv # Primary + conv params (single precision) + return 2 * s + wconv # Primary *2, conv stays single precision + + + # def get_attn_params_size(config: ModelConfig, use_fp8: bool): + def get_attn_params_size(self, config, use_fp8): + if config.attn_type == "MHA/GQA": # MHA or GQA attention type + return self.get_mha_params_size(config, use_fp8) + elif config.attn_type == "MLA": # MLA architecture + return self.get_mla_params_size(config, use_fp8) + + + # def get_linear_attn_params_size(config: ModelConfig, use_fp8: bool): + def get_linear_attn_params_size(self, config, use_fp8): + return self.get_gdn_params_size(config, use_fp8) # Get linear attention (GD-Nets style) params / 获取线性注意力参数总量 + + # MoE (suited for DeepSeek) / MoE(适合 DeepSeek) + # "num_hidden_layers": 61, + # "hidden_size": 7168, + # "moe_intermediate_size": 2048, // Routed expert MLP intermediate dim / 路由专家 MLP 的中间维度 + # "n_shared_experts": 1, // Shared expert count / 共享专家数量 + # "n_routed_experts": 256, // Routed expert count / 路由专家数量 + # "first_k_dense_replace": 3, // First K layers use dense instead of MoE / 前几层使用dense替换MoE + # "intermediate_size": 18432, // First 3 layers (9*moe_intermediate_size) / 前3层 + + # Per-expert params: / 每个专家的参数量是: + # 7168 * 2048 * 3 = 44,040,192 + # config.hidden_size * config.moe_intermediate_size * 3 + # Router gate params: / 路由 Gate 的参数量是: + # 256 * 7168 + 256 = 1,835,264 + # First 3 dense layers (8 routed + 1 shared per layer): / 前 3 层 dense(固定激活 8 路由专家): + # 44,040,192 * 9 * 3 = 1,189,085,184 + # Last 58 sparse layers (dynamically activate 8 routed): / 后 58 层稀疏(动态激活 8 路由专家): + # (44,040,192 * 257 + 1,835,264) * 58 = 656,569,547,264 + # DeepSeek V3 MoE total params: / DeepSeek V3 MoE 部分总参数量: + # 1,189,085,184 + 656,569,547,264 = 657,758,632,448 (~657B) + # Active params per forward (1 shared + 8 routed): / 每次计算激活参数量(1共享 + 8路由): + # 44,040,192 * 9 * 61 + 1,835,264 * 58 = 24,284,510,720 (~24B) + # def get_expert_params_size(config: ModelConfig, use_fp8: bool): + def get_expert_params_size(self, config, use_fp8): + if self._replica_config.model_name in [ 'qwen3-moe-235B']: + # config.intermediate_size = 122888 + # config.moe_intermediate_size = 1536 + config.intermediate_size = config.moe_intermediate_size + w = 3 * config.hidden_size * config.intermediate_size # MoE expert FFN params (W1, W2, W3) / MoE专家前馈网络参数量 + else: + w = 3 * config.hidden_size * config.intermediate_size # MoE expert FFN params (W1, W2, W3) / MoE专家前馈网络参数量 + if not use_fp8: # Not using FP8 / 不使用FP8量化 + w *= 2 # Double for FP16 / 参数量翻倍 + return w # Return expert params total / 返回专家参数总量 + + + # def load_attn_weights_time(config: ModelConfig, use_fp8: bool, gpu: GPU): + def load_attn_weights_time(self, config, use_fp8, gpu): + size = self.get_attn_params_size(config, use_fp8) # Get attention weights size (bytes) / 获取注意力模块权重总大小 + return size / 1024 / 1024 / 1024 / gpu.mem_bw # Convert to GB / mem_bw = load time (s) / 转换为GB并除以GPU内存带宽 + + + # def load_moe_weights_time(config: ModelConfig, use_fp8: bool, gpu: GPU, num_gpus): + def load_moe_weights_time(self, config, use_fp8, gpu, num_gpus): + size = self.get_expert_params_size(config, use_fp8) # Get single expert weights size / 获取单个专家权重大小 + size *= config.num_routed_experts / num_gpus # Distribute across GPUs / 总专家数分配到多个GPU上 + return size / 1024 / 1024 / 1024 / gpu.mem_bw # Load time in seconds / 加载时间(秒)