Benchmarks
Execute prebid-bench Benchmark on SDK dev
kubectl run "prebid-benchmark-$(head -c 64 /dev/urandom | tr -dc 'a-z0-9' | head -c 12)" \
--image=acrsdk.azurecr.io/prebid-benchmark:latest \
--image-pull-policy=Always \
--restart=Never \
-n playground \
--env="PBS_BENCH_RETURN_EMPTY_PERCENT=2" \
--env="PBS_BENCH_DURATION=300" \
--env="PBS_BENCH_REQUEST_INTERVAL=10" \
--env="PBS_BENCH_CONNECTIONS=100" \
--env="PBS_BENCH_TARGET_RPS=100000" \
--env="PBS_BENCH_HOST_TCP=prebid.sdk-cloud.de:6970" \
--overrides='{
"spec": {
"imagePullSecrets": [
{ "name": "acrsdk-auth" }
]
}
}'Execute prebid-bench benchmark on myloc
kubectl run "prebid-benchmark-$(head -c 64 /dev/urandom | tr -dc 'a-z0-9' | head -c 12)" \
--image=acrsdk.azurecr.io/prebid-benchmark:latest \
--image-pull-policy=Always \
--restart=Never \
-n benchmark \
--env="PBS_BENCH_RETURN_EMPTY_PERCENT=2" \
--env="PBS_BENCH_DURATION=300" \
--env="PBS_BENCH_REQUEST_INTERVAL=10" \
--env="PBS_BENCH_CONNECTIONS=100" \
--env="PBS_BENCH_TARGET_RPS=50000" \
--env="PBS_BENCH_HOST_TCP=prebid.sdk-cloud.de:6970" \
--overrides='{
"spec": {
"imagePullSecrets": [
{ "name": "acrsdk-auth" }
]
}
}'Execute prebid-bench benchmark on myloc.kube-master
kubectl run "prebid-benchmark-$(head -c 64 /dev/urandom | tr -dc 'a-z0-9' | head -c 12)" \
--image=acrsdk.azurecr.io/prebid-benchmark:latest \
--image-pull-policy=Always \
--restart=Never \
-n benchmark \
--env="PBS_BENCH_RETURN_EMPTY_PERCENT=2" \
--env="PBS_BENCH_DURATION=300" \
--env="PBS_BENCH_REQUEST_INTERVAL=10" \
--env="PBS_BENCH_CONNECTIONS=100" \
--env="PBS_BENCH_TARGET_RPS=100000" \
--env="PBS_BENCH_HOST_TCP=prebid.sdk-cloud.de:6970" \
--overrides='{
"spec": {
"imagePullSecrets": [
{ "name": "acrsdk-auth" }
],
"nodeSelector": {
"kubernetes.io/hostname": "kube-master"
},
"hostNetwork": true
}
}'Single Node Ingress?
run on both host machines directly:
VIP1=5.199.128.26
VIP2=5.199.140.38
PORT=6970
DUR=60
IF=$(ip route | awk '/default/ {print $5; exit}')
TS=$(date +%Y%m%d-%H%M%S)
mkdir -p /tmp/pbs-ingress-$TS
cd /tmp/pbs-ingress-$TS
tcpdump -nn -Q in -i "$IF" -w vip1-in.pcap "dst host $VIP1 and tcp dst port $PORT" >/dev/null 2>&1 & P1=$!
tcpdump -nn -Q in -i "$IF" -w vip2-in.pcap "dst host $VIP2 and tcp dst port $PORT" >/dev/null 2>&1 & P2=$!
sleep "$DUR"
kill -INT $P1 $P2
wait $P1 2>/dev/null
wait $P2 2>/dev/null
C1=$(tcpdump -nn -r vip1-in.pcap 2>/dev/null | wc -l)
C2=$(tcpdump -nn -r vip2-in.pcap 2>/dev/null | wc -l)
echo "HOST=$(hostname) IF=$IF DUR=${DUR}s VIP1=$VIP1 COUNT=$C1 VIP2=$VIP2 COUNT=$C2" | tee summary.txtThe script captures incoming TCP traffic for two specific IP addresses (VIP1, VIP2) on port 6970 for 60 seconds using tcpdump on the system’s default network interface. It saves the captured packets to .pcap files in a timestamped temporary directory. After the capture ends, it counts the number of packets for each VIP and prints a summary containing the host, interface, duration, and packet counts.
CMDs
VM debug shells (read-only)
kube-worker
kubectl debug node/kube-worker -it --image=nicolaka/netshoot -- chroot /hostkube-master
kubectl debug node/kube-master -it --image=nicolaka/netshoot -- chroot /hostVM debug shells (write) ❗
kube-master
cat <<'EOF' > host-debug-kube-master.yaml
apiVersion: v1
kind: Pod
metadata:
name: host-debug-kube-master
namespace: kube-system
spec:
nodeName: kube-master
hostPID: true
hostNetwork: true
tolerations:
- operator: Exists
containers:
- name: debug
image: nicolaka/netshoot:latest
command: ["/bin/sh", "-c", "sleep infinity"]
securityContext:
privileged: true
allowPrivilegeEscalation: true
capabilities:
add: ["SYS_ADMIN", "NET_ADMIN", "SYS_RESOURCE"]
volumeMounts:
- name: host-root
mountPath: /host
readOnly: false
volumes:
- name: host-root
hostPath:
path: /
type: Directory
restartPolicy: Never
EOF
kubectl apply -f host-debug-kube-master.yamlkubectl exec -it -n kube-system host-debug-kube-master -- /bin/shkube-worker
cat <<'EOF' > host-debug-kube-worker.yaml
apiVersion: v1
kind: Pod
metadata:
name: host-debug-kube-worker
namespace: kube-system
spec:
nodeName: kube-worker
hostPID: true
hostNetwork: true
tolerations:
- operator: Exists
containers:
- name: debug
image: nicolaka/netshoot:latest
command: ["/bin/sh", "-c", "sleep infinity"]
securityContext:
privileged: true
allowPrivilegeEscalation: true
capabilities:
add: ["SYS_ADMIN", "NET_ADMIN", "SYS_RESOURCE"]
volumeMounts:
- name: host-root
mountPath: /host
readOnly: false
volumes:
- name: host-root
hostPath:
path: /
type: Directory
restartPolicy: Never
EOF
kubectl apply -f host-debug-kube-worker.yamlkubectl exec -it -n kube-system host-debug-kube-worker -- /bin/shMISC
apt update
apt install sysstatmpstat -P ALL 1 120
pidstat -t -p ALL 1 30 | egrep 'ksoftirqd/[0-9]+' mpstat -P ALL 1 120 zeigt die CPU-Auslastung pro Core über 120 Sekunden (inkl. SoftIRQ-Anteil), während pidstat -t -p ALL 1 30 | egrep ‘ksoftirqd/[0-9]+’ die CPU-Nutzung der Kernel-Threads ksoftirqd anzeigt, die SoftIRQ-Netzwerkverarbeitung übernehmen.
Network measurements
nc -vz -w2 5.199.128.26 6969
# Connection to 5.199.128.26 6969 port [tcp/*] succeeded!TCP RTT
# hping3 -S -p 6969 -c 100 5.199.128.26
# hping3 -S -p 6970 -c 100 5.199.140.38
hping3 -S -p 6970 -c 100 prebid.sdk-cloud.dehping3 -S -p 6970 -c 100 prebid.sdk-cloud.derun in myloc cluster
kubectl run netshoot -it --rm --image=nicolaka/netshoot -- bashapk update
apk add --no-cache hping3Netzwerklast verteilung auf den VMs herausfinden
Das Skript sammelt über etwa 120 Sekunden Netzwerk- und CPU-Diagnosedaten (IRQ-Verteilung, NIC-Queue-Statistiken, SoftIRQ-Last, ksoftirqd-Aktivität und Softnet-Drops) für ein Interface (ens18), speichert Snapshots vorher/nachher und berechnet anschließend IRQ-Last pro Queue, SoftIRQ-Hotspot-CPUs sowie Paket-Drops/Squeeze-Raten, um Netzwerk-Queue- oder Interrupt-Bottlenecks zu identifizieren.
TS=$(date +%F-%H%M%S)
OUT=/tmp/queue-check-$TS
mkdir -p "$OUT"
IF=ens18
# 1) Baseline snapshots
date | tee "$OUT/start.txt"
ethtool -l $IF > "$OUT/ethtool_l.txt"
ethtool -S $IF > "$OUT/ethtool_s.before.txt"
grep -Ei 'virtio1-(input|output)' /proc/interrupts > "$OUT/irq.before"
cat /proc/net/softnet_stat > "$OUT/softnet.before"
# 2) CPU + ksoftirqd (120s)
mpstat -P ALL 1 120 > "$OUT/mpstat.log" &
MP=$!
pidstat -t -p ALL 1 120 > "$OUT/pidstat.log" &
PP=$!
# 3) Mid-window IRQ delta (60s)
sleep 60
grep -Ei 'virtio1-(input|output)' /proc/interrupts > "$OUT/irq.mid"
wait $MP
wait $PP
# 4) End snapshots
cat /proc/net/softnet_stat > "$OUT/softnet.after"
ethtool -S $IF > "$OUT/ethtool_s.after.txt"
grep -Ei 'virtio1-(input|output)' /proc/interrupts > "$OUT/irq.after"
date | tee "$OUT/end.txt"
echo "OUT=$OUT"
# Now evaluate:
OUT=$(ls -dt /tmp/queue-check-* | head -1)
# A) top softirq cores
awk '$1=="Average:" && $2~/^[0-9]+$/ {print "cpu"$2,"soft="$8,"sys="$5,"idle="$12}' "$OUT/mpstat.log" | sort -t= -k2,2nr | head -10
# B) top ksoftirqd
awk '$1=="Average:" && $11 ~ /ksoftirqd\// && $11 !~ /^\|__/ {print $11,"cpu="$9}' "$OUT/pidstat.log" | sort -t= -k2,2nr | head -10
# C) IRQ delta per queue (first 60s)
awk '
NR==FNR{
irq=$1; sub(":","",irq)
t=0; for(i=2;i<=29;i++) t+=$i
b[irq]=t; n[irq]=$NF
next
}
{
irq=$1; sub(":","",irq)
t=0; for(i=2;i<=29;i++) t+=$i
printf "%-18s delta=%10d irq=%s\n",$NF,(t-b[irq]),irq
}
' "$OUT/irq.before" "$OUT/irq.mid" | sort -k2,2nr
# D) softnet pressure (120s)
gawk '
BEGIN {MOD=4294967296}
NR==FNR {b[FNR]=$0; next}
{
split(b[FNR],x); split($0,y)
p=strtonum("0x" y[1]) - strtonum("0x" x[1]); if (p<0) p+=MOD
d=strtonum("0x" y[2]) - strtonum("0x" x[2]); if (d<0) d+=MOD
s=strtonum("0x" y[3]) - strtonum("0x" x[3]); if (s<0) s+=MOD
tp+=p; td+=d; ts+=s
}
END {
printf "TOTAL processed=%u drop=%u squeeze=%u drop_ppm=%.2f squeeze_ppm=%.2f\n",
tp, td, ts, (tp?td*1e6/tp:0), (tp?ts*1e6/tp:0)
}
' "$OUT/softnet.before" "$OUT/softnet.after"