Google
Unsloth

Gemma 4 E4B IT benchmark on a NVIDIA's logo.GeForce RTX 2080 Ti

<- Runs

Prompt tokens

40,960

Generation tokens

10,240

Trials passed

10/10

Verified

93.3 tok/s

3,378.0 tok/s

Peak memory

5.08/31 GB

Runs great

Trials

Decode / Prefill Speeds

Metadata

metadata.json
{
"runId": "run_630cf218-3538-4119-bea8-0681e53d1492",
"bundleId": "llamacpp-gemma-4-e4b-it-q4_0.gguf-2b3be4",
"status": "verified",
"promptTokens": 40960,
"completionTokens": 10240,
"contextLength": 5120,
"harness": {
"version": "0.1.21",
"gitSha": "unknown"
},
"runtime": {
"name": "llama.cpp",
"version": "b9110",
"buildFlags": "metal"
},
"model": {
"displayName": "Gemma 4 E4B IT",
"format": "gguf",
"quant": "q4_0",
"architecture": "gemma4",
"source": "unsloth/gemma-4-E4B-it-GGUF:gemma-4-E4B-it-Q4_0.gguf",
"fileSizeBytes": 4836000928,
"lab": {
"name": "Google",
"slug": "google"
},
"quantizedBy": {
"name": "Unsloth",
"slug": "unsloth"
}
},
"device": {
"cpu": "AMD Ryzen 7 2700X Eight-Core Processor",
"cpuCores": 16,
"gpu": "NVIDIA GeForce RTX 2080 Ti",
"gpuCores": 0,
"gpuCount": 1,
"ramGb": 31,
"osName": "Pop!_OS 22.04 LTS",
"osVersion": "22.04"
},
"decodeTpsMean": 93.3,
"prefillTpsMean": 3378,
"ttftP50Ms": 1213.72,
"idleTpsMean": 2959.3,
"peakRssMb": 5203.4,
"trialsPassed": 10,
"trialsTotal": 10,
"runnabilityScore": 0.9041496039746544,
"bundleSha256": "80415ebaa94ed8f5d61ba24763b2b36b24b22b9f90de51460db2ed6fe0c5ab7b",
"createdAt": "2026-05-13T15:05:56.453Z"
}