123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365 | def analyze_retention(all_results):
"""Compute retention in three dimensions."""
member_map = build_member_map(all_results)
{cy: _conf_area(cy) for cy in all_results}
# Build per-conference-year member sets for year-over-year analysis
# {(CONF, year) → set of normalized names}
cy_members = defaultdict(set)
for norm, conf_years in member_map.items():
for conf, year in conf_years:
cy_members[(conf, year)].add(norm)
# Get all conferences and years
all_confs = sorted(set(c for c, y in cy_members))
all_years = sorted(set(y for c, y in cy_members))
# ── (a) Within-conference retention ──────────────────────────────────
# For each conference, what % of year Y members were also in year Y-1
# for THE SAME conference?
logger.info("\n" + "=" * 70)
logger.info("(a) WITHIN-CONFERENCE RETENTION (same conference, year Y-1 → Y)")
logger.info("=" * 70)
within_conf_data = {}
for conf in all_confs:
conf_years = sorted(y for c, y in cy_members if c == conf)
if len(conf_years) < 2:
continue
within_conf_data[conf] = {}
for i in range(1, len(conf_years)):
prev_year = conf_years[i - 1]
curr_year = conf_years[i]
prev_members = cy_members[(conf, prev_year)]
curr_members = cy_members[(conf, curr_year)]
retained = prev_members & curr_members
if curr_members:
pct = len(retained) / len(curr_members) * 100
else:
pct = 0
within_conf_data[conf][(prev_year, curr_year)] = {
"prev_size": len(prev_members),
"curr_size": len(curr_members),
"retained": len(retained),
"retained_pct": pct,
}
logger.info(
f" {conf} {prev_year}→{curr_year}: "
f"{len(retained)}/{len(curr_members)} retained = {pct:.1f}% "
f"(prev={len(prev_members)}, curr={len(curr_members)})"
)
# Aggregate within-conference retention by year
logger.info("\n --- Aggregated within-conference retention by year ---")
for year in all_years:
total_retained = 0
total_curr = 0
for conf in all_confs:
for (_py, cy), data in within_conf_data.get(conf, {}).items():
if cy == year:
total_retained += data["retained"]
total_curr += data["curr_size"]
if total_curr > 0:
pct = total_retained / total_curr * 100
logger.info(f" Year {year}: {total_retained}/{total_curr} = {pct:.1f}%")
# Aggregate by area
for area_label, area_confs in [("SYSTEMS", SYSTEMS_CONFS), ("SECURITY", SECURITY_CONFS)]:
logger.info(f"\n --- {area_label} within-conference retention by year ---")
for year in all_years:
total_retained = 0
total_curr = 0
for conf in all_confs:
if conf.lower() not in area_confs:
continue
for (_py, cy), data in within_conf_data.get(conf, {}).items():
if cy == year:
total_retained += data["retained"]
total_curr += data["curr_size"]
if total_curr > 0:
pct = total_retained / total_curr * 100
logger.info(f" Year {year}: {total_retained}/{total_curr} = {pct:.1f}%")
# ── (b) Cross-conference retention (same area) ───────────────────────
# For each year, what % of members served on a DIFFERENT conference
# in the same area the previous year?
logger.info("\n" + "=" * 70)
logger.info("(b) CROSS-CONFERENCE RETENTION (different conf, same area, Y-1 → Y)")
logger.info("=" * 70)
for area_label, area_confs in [("SYSTEMS", SYSTEMS_CONFS), ("SECURITY", SECURITY_CONFS)]:
logger.info(f"\n --- {area_label} ---")
area_conf_names = [c for c in all_confs if c.lower() in area_confs]
for year in all_years:
prev_year = year - 1
# Members in this area, this year
curr_area_members = set()
for conf in area_conf_names:
curr_area_members |= cy_members.get((conf, year), set())
if not curr_area_members:
continue
# Members in same area, previous year, any conference
prev_area_members = set()
for conf in area_conf_names:
prev_area_members |= cy_members.get((conf, prev_year), set())
# Of current year's members, how many were in same area last year
# but on a DIFFERENT conference?
retained_same_conf = set()
retained_diff_conf = set()
for member in curr_area_members:
if member in prev_area_members:
# Check if same conference or different
curr_confs = {c for c in area_conf_names if member in cy_members.get((c, year), set())}
prev_confs = {c for c in area_conf_names if member in cy_members.get((c, prev_year), set())}
if curr_confs & prev_confs:
retained_same_conf.add(member)
else:
retained_diff_conf.add(member)
total_retained = len(retained_same_conf) + len(retained_diff_conf)
if curr_area_members:
pct_same = len(retained_same_conf) / len(curr_area_members) * 100
pct_diff = len(retained_diff_conf) / len(curr_area_members) * 100
pct_total = total_retained / len(curr_area_members) * 100
newcomers = len(curr_area_members) - total_retained
pct_new = newcomers / len(curr_area_members) * 100
logger.info(
f" {year}: total={len(curr_area_members)} "
f"same-conf={len(retained_same_conf)} ({pct_same:.1f}%) "
f"diff-conf={len(retained_diff_conf)} ({pct_diff:.1f}%) "
f"total-retained={total_retained} ({pct_total:.1f}%) "
f"newcomers={newcomers} ({pct_new:.1f}%)"
)
# ── (c) Cross-area retention ─────────────────────────────────────────
# Members who served in BOTH systems and security conferences
logger.info("\n" + "=" * 70)
logger.info("(c) CROSS-AREA RETENTION (systems ↔ security)")
logger.info("=" * 70)
# Per member: which areas did they serve in?
cross_area_members = set()
for norm, conf_years in member_map.items():
areas = set()
for conf, year in conf_years:
cy_str = f"{conf.lower()}{year}"
area = _conf_area(cy_str)
if area in ("systems", "security"):
areas.add(area)
if len(areas) == 2:
cross_area_members.add(norm)
total_unique = len(member_map)
logger.info(f"\n Total unique members: {total_unique}")
logger.info(f" Cross-area members (served in both systems + security): {len(cross_area_members)}")
logger.info(f" Cross-area percentage: {len(cross_area_members) / total_unique * 100:.1f}%")
# Year-by-year cross-area analysis
logger.info("\n --- Cross-area by year ---")
for year in all_years:
sys_members_year = set()
sec_members_year = set()
for conf in all_confs:
area = _conf_area(f"{conf.lower()}2020") # area doesn't depend on year
members = cy_members.get((conf, year), set())
if area == "systems":
sys_members_year |= members
elif area == "security":
sec_members_year |= members
if sys_members_year and sec_members_year:
overlap = sys_members_year & sec_members_year
# Members who were in security previously, now in systems (or vice versa)
logger.info(
f" {year}: systems={len(sys_members_year)}, security={len(sec_members_year)}, "
f"overlap={len(overlap)} ({len(overlap) / len(sys_members_year | sec_members_year) * 100:.1f}%)"
)
elif sys_members_year or sec_members_year:
len(sys_members_year) + len(sec_members_year)
logger.info(f" {year}: systems={len(sys_members_year)}, security={len(sec_members_year)}, overlap=0")
# Cross-area year-over-year: members in area X in year Y who were in area Z in year Y-1
logger.info("\n --- Cross-area mobility (area X this year, area Y last year) ---")
for year in all_years:
prev_year = year - 1
sys_curr = set()
sec_curr = set()
sys_prev = set()
sec_prev = set()
for conf in all_confs:
area = _conf_area(f"{conf.lower()}2020")
if area == "systems":
sys_curr |= cy_members.get((conf, year), set())
sys_prev |= cy_members.get((conf, prev_year), set())
elif area == "security":
sec_curr |= cy_members.get((conf, year), set())
sec_prev |= cy_members.get((conf, prev_year), set())
# Security→Systems mobility
sec_to_sys = sys_curr & sec_prev - sys_prev
# Systems→Security mobility
sys_to_sec = sec_curr & sys_prev - sec_prev
if sys_curr or sec_curr:
logger.info(f" {year}: sec→sys={len(sec_to_sys)}, sys→sec={len(sys_to_sec)}")
# ── Summary comparison with EuroSys ──────────────────────────────────
logger.info("\n" + "=" * 70)
logger.info("SUMMARY: Comparison with EuroSys findings (D'Elia et al., ACM REP '25)")
logger.info("=" * 70)
logger.info("""
EuroSys claim: "the number of returning members remains relatively small,
suggesting that most AEC members are new." (Based on EuroSys-only data, Table 4b)
Our findings:
""")
# Compute EuroSys-specific retention if available
eurosys_years = sorted(y for c, y in cy_members if c == "EUROSYS")
if len(eurosys_years) >= 2:
logger.info(" EuroSys within-conference retention (for direct comparison):")
for i in range(1, len(eurosys_years)):
py, cy = eurosys_years[i - 1], eurosys_years[i]
prev = cy_members.get(("EUROSYS", py), set())
curr = cy_members.get(("EUROSYS", cy), set())
retained = prev & curr
if curr:
pct = len(retained) / len(curr) * 100
logger.info(f" {py}→{cy}: {len(retained)}/{len(curr)} = {pct:.1f}%")
# Overall within-conf average
all_within = []
for _conf, pairs in within_conf_data.items():
for _, data in pairs.items():
if data["curr_size"] > 10: # skip tiny committees
all_within.append(data["retained_pct"])
if all_within:
avg_within = sum(all_within) / len(all_within)
logger.info(f"\n Average within-conference retention: {avg_within:.1f}%")
# Average cross-conference (different conf, same area)
logger.info("\n Key insight: Are people switching conferences rather than leaving entirely?")
|