web/src/lib/humanness.ts 8.2 K raw
1
import type { RawDigraph, SessionMetadata, HumannessResult } from './types';
2
import { aggregateDigraphs } from './aggregation';
3
4
function clamp(value: number, min = 0, max = 100): number {
5
  return Math.max(min, Math.min(max, value));
6
}
7
8
/**
9
 * Timing Variance (weight 0.20)
10
 * Measures coefficient of variation (std/mean) of pressPress per digraph.
11
 * Humans typically have CV > 0.15, bots < 0.05.
12
 */
13
function scoreTimingVariance(digraphs: RawDigraph[]): number {
14
  const aggs = aggregateDigraphs(digraphs);
15
  if (aggs.length === 0) return 0;
16
17
  const cvs: number[] = [];
18
  for (const agg of aggs) {
19
    if (agg.pressPress.mean > 0 && agg.count >= 2) {
20
      cvs.push(agg.pressPress.std / agg.pressPress.mean);
21
    }
22
  }
23
24
  if (cvs.length === 0) return 0;
25
26
  const avgCv = cvs.reduce((s, v) => s + v, 0) / cvs.length;
27
28
  // CV < 0.03 → 0, CV >= 0.20 → 100
29
  if (avgCv >= 0.20) return 100;
30
  if (avgCv <= 0.03) return 0;
31
  return clamp(Math.round((avgCv - 0.03) / (0.20 - 0.03) * 100));
32
}
33
34
/**
35
 * Correction Rate (weight 0.15)
36
 * Ratio of backspaces to total keystrokes.
37
 * Humans typically 5-15%, zero is suspicious.
38
 */
39
function scoreCorrectionRate(metadata: SessionMetadata): number {
40
  if (metadata.totalKeystrokes === 0) return 0;
41
42
  const rate = metadata.backspaceCount / metadata.totalKeystrokes;
43
44
  // 0% → suspicious (score 10), 5-15% → ideal (100), >25% → decreasing
45
  if (rate === 0) return 10;
46
  if (rate < 0.02) return clamp(Math.round(10 + (rate / 0.02) * 40));
47
  if (rate <= 0.15) return 100;
48
  if (rate <= 0.30) return clamp(Math.round(100 - ((rate - 0.15) / 0.15) * 50));
49
  return 50;
50
}
51
52
/**
53
 * Pause Distribution (weight 0.20)
54
 * Presence and frequency of natural thinking pauses (> 500ms gaps).
55
 * Humans have pauses for thinking; bots type continuously.
56
 */
57
function scorePauseDistribution(metadata: SessionMetadata, digraphs: RawDigraph[]): number {
58
  if (digraphs.length === 0) return 0;
59
60
  const pauseRate = metadata.pauseCount / digraphs.length;
61
62
  // Collect pressPress values to check variance of pause-length gaps
63
  const longGaps = digraphs
64
    .map((d) => d.pressPress)
65
    .filter((pp) => pp > 500);
66
67
  const hasVariedPauses = longGaps.length >= 2
68
    ? computeStd(longGaps) / (longGaps.reduce((s, v) => s + v, 0) / longGaps.length) > 0.2
69
    : false;
70
71
  // No pauses at all → suspicious
72
  if (metadata.pauseCount === 0) return 5;
73
74
  let score = 0;
75
76
  // Pause rate: ideal is 0.03-0.15 pauses per digraph
77
  if (pauseRate >= 0.03 && pauseRate <= 0.15) {
78
    score = 80;
79
  } else if (pauseRate < 0.03) {
80
    score = clamp(Math.round(20 + (pauseRate / 0.03) * 60));
81
  } else {
82
    score = clamp(Math.round(80 - ((pauseRate - 0.15) / 0.30) * 30));
83
  }
84
85
  // Bonus for varied pause lengths (natural thinking)
86
  if (hasVariedPauses) score = clamp(score + 20);
87
88
  return score;
89
}
90
91
/**
92
 * Distribution Shape (weight 0.15)
93
 * Measures skewness of timing arrays.
94
 * Human typing is right-skewed (log-normal distribution).
95
 */
96
function scoreDistributionShape(digraphs: RawDigraph[]): number {
97
  const ppValues = digraphs.map((d) => d.pressPress);
98
  if (ppValues.length < 10) return 50; // not enough data
99
100
  const mean = ppValues.reduce((s, v) => s + v, 0) / ppValues.length;
101
  const std = computeStd(ppValues);
102
  if (std === 0) return 0;
103
104
  // Compute skewness: E[(X - mean)^3] / std^3
105
  const n = ppValues.length;
106
  const skewness =
107
    (n / ((n - 1) * (n - 2))) *
108
    ppValues.reduce((s, v) => s + Math.pow((v - mean) / std, 3), 0);
109
110
  // Human typing is right-skewed (positive skewness, typically 0.5-3.0)
111
  // Bot typing tends toward 0 skewness (symmetric) or negative
112
  if (skewness <= 0) return clamp(Math.round(20 + skewness * 10));
113
  if (skewness >= 0.5 && skewness <= 3.0) return 100;
114
  if (skewness < 0.5) return clamp(Math.round(20 + (skewness / 0.5) * 80));
115
  // Very high skewness (> 3.0) still human-like but slightly less certain
116
  return clamp(Math.round(100 - ((skewness - 3.0) / 5.0) * 20));
117
}
118
119
/**
120
 * Flight Time Negativity (weight 0.15)
121
 * Percentage of digraphs with releasePress < 0 (key overlap).
122
 * Humans overlap keys while typing; bots release before pressing next.
123
 */
124
function scoreFlightTimeNegativity(digraphs: RawDigraph[]): number {
125
  if (digraphs.length === 0) return 0;
126
127
  const negativeCount = digraphs.filter((d) => d.releasePress < 0).length;
128
  const negativeRate = negativeCount / digraphs.length;
129
130
  // Humans typically have 10-40% negative flight times
131
  // 0% → very suspicious, 10-40% → ideal
132
  if (negativeRate === 0) return 5;
133
  if (negativeRate < 0.05) return clamp(Math.round(5 + (negativeRate / 0.05) * 40));
134
  if (negativeRate >= 0.10 && negativeRate <= 0.50) return 100;
135
  if (negativeRate < 0.10) return clamp(Math.round(45 + ((negativeRate - 0.05) / 0.05) * 55));
136
  // Very high overlap might just be fast typing
137
  return clamp(Math.round(100 - ((negativeRate - 0.50) / 0.30) * 30));
138
}
139
140
/**
141
 * Burst Patterns (weight 0.15)
142
 * Variance of "burst" lengths (consecutive digraphs with < 300ms gap).
143
 * Humans type in bursts of 3-15 keys with pauses between.
144
 */
145
function scoreBurstPatterns(digraphs: RawDigraph[]): number {
146
  if (digraphs.length < 5) return 50;
147
148
  // Digraphs are stored newest-first, reverse for chronological order
149
  const chronological = [...digraphs].reverse();
150
151
  const bursts: number[] = [];
152
  let currentBurst = 1;
153
154
  for (let i = 1; i < chronological.length; i++) {
155
    if (chronological[i].pressPress < 300) {
156
      currentBurst++;
157
    } else {
158
      bursts.push(currentBurst);
159
      currentBurst = 1;
160
    }
161
  }
162
  bursts.push(currentBurst);
163
164
  if (bursts.length < 2) return 50;
165
166
  const avgBurst = bursts.reduce((s, v) => s + v, 0) / bursts.length;
167
  const burstStd = computeStd(bursts);
168
  const burstCv = avgBurst > 0 ? burstStd / avgBurst : 0;
169
170
  let score = 0;
171
172
  // Humans have varied burst lengths (CV > 0.3) and average burst 3-15
173
  if (avgBurst >= 3 && avgBurst <= 15) {
174
    score += 50;
175
  } else if (avgBurst < 3) {
176
    score += Math.round((avgBurst / 3) * 50);
177
  } else {
178
    score += Math.round(Math.max(20, 50 - ((avgBurst - 15) / 10) * 30));
179
  }
180
181
  // Variance in burst lengths
182
  if (burstCv > 0.5) {
183
    score += 50;
184
  } else if (burstCv > 0.1) {
185
    score += Math.round(((burstCv - 0.1) / 0.4) * 50);
186
  } else {
187
    score += 5; // nearly uniform bursts → suspicious
188
  }
189
190
  return clamp(score);
191
}
192
193
function computeStd(values: number[]): number {
194
  if (values.length < 2) return 0;
195
  const m = values.reduce((s, v) => s + v, 0) / values.length;
196
  const variance = values.reduce((s, v) => s + (v - m) ** 2, 0) / (values.length - 1);
197
  return Math.sqrt(variance);
198
}
199
200
const WEIGHTS = {
201
  timingVariance: 0.20,
202
  correctionRate: 0.15,
203
  pauseDistribution: 0.20,
204
  distributionShape: 0.15,
205
  flightTimeNegativity: 0.15,
206
  burstPatterns: 0.15,
207
} as const;
208
209
export function analyzeHumanness(
210
  digraphs: RawDigraph[],
211
  metadata: SessionMetadata,
212
): HumannessResult | null {
213
  if (digraphs.length < 20) return null;
214
215
  const subScores = {
216
    timingVariance: scoreTimingVariance(digraphs),
217
    correctionRate: scoreCorrectionRate(metadata),
218
    pauseDistribution: scorePauseDistribution(metadata, digraphs),
219
    distributionShape: scoreDistributionShape(digraphs),
220
    flightTimeNegativity: scoreFlightTimeNegativity(digraphs),
221
    burstPatterns: scoreBurstPatterns(digraphs),
222
  };
223
224
  const score = Math.round(
225
    subScores.timingVariance * WEIGHTS.timingVariance +
226
    subScores.correctionRate * WEIGHTS.correctionRate +
227
    subScores.pauseDistribution * WEIGHTS.pauseDistribution +
228
    subScores.distributionShape * WEIGHTS.distributionShape +
229
    subScores.flightTimeNegativity * WEIGHTS.flightTimeNegativity +
230
    subScores.burstPatterns * WEIGHTS.burstPatterns,
231
  );
232
233
  let verdict: HumannessResult['verdict'];
234
  if (score >= 60) verdict = 'likely human';
235
  else if (score >= 40) verdict = 'uncertain';
236
  else verdict = 'likely bot';
237
238
  // Factor in paste ratio as a penalty
239
  const pasteRatio = metadata.totalKeystrokes > 0
240
    ? metadata.pastedCharCount / metadata.totalKeystrokes
241
    : 0;
242
243
  let adjustedScore = score;
244
  if (pasteRatio > 0.5) {
245
    adjustedScore = Math.round(score * (1 - (pasteRatio - 0.5) * 0.4));
246
  }
247
248
  if (adjustedScore !== score) {
249
    if (adjustedScore >= 60) verdict = 'likely human';
250
    else if (adjustedScore >= 40) verdict = 'uncertain';
251
    else verdict = 'likely bot';
252
  }
253
254
  return {
255
    score: clamp(adjustedScore),
256
    verdict,
257
    subScores,
258
  };
259
}