sed -i "s/Świerkowska/{\\\\'S}wierkowska/" bibliography.bib sed -i "s/Héctor/H{\\\\'e}ctor/" bibliography.bib sed -i "s/Bombín/Bomb{\\\\'i}n/" bibliography.bib sed -i "s/Zémor/Z{\\\\'e}mor/" bibliography.bib sed -Ezi "s/\s(abstract|note|urldate|url|keywords|file) = \{[^}]*(\{[^}]*\}[^}]*)*\},?\n//g" bibliography.bib # Normalize arXiv-only entries to @misc with howpublished = {arXiv:}. # Detection: doi matches 10.48550/arXiv.. The IEEEtranSA .bst's @article # handler needs a journal field (which preprints lack) and ignores publisher, # so for arXiv preprints we coerce the type to @misc and add howpublished # (the field the .bst actually prints for @misc). python3 - <<'PY' import re path = "bibliography.bib" with open(path) as f: text = f.read() doi_re = re.compile(r"doi\s*=\s*\{10\.48550/arXiv\.([^}]+)\}") type_re = re.compile(r"^@([A-Za-z]+)\{", re.MULTILINE) howpublished_re = re.compile(r"^\s*howpublished\s*=\s*\{", re.MULTILINE) title_field_re = re.compile(r"\b(title|booktitle)\s*=\s*\{", re.IGNORECASE) inner_brace_re = re.compile(r"\{([A-Za-z0-9]+)\}") # Split into entries by scanning for top-level "@type{...}" blocks. We walk # brace depth so that the closing "}" of the entry is matched correctly even # if internal fields contain braces. def split_entries(s): out, i, n = [], 0, len(s) while i < n: m = type_re.search(s, i) if not m: out.append(("text", s[i:])) break if m.start() > i: out.append(("text", s[i:m.start()])) depth, j = 0, m.start() while j < n: c = s[j] if c == "{": depth += 1 elif c == "}": depth -= 1 if depth == 0: j += 1 break j += 1 out.append(("entry", s[m.start():j])) i = j return out def normalize_arxiv(entry): doi_m = doi_re.search(entry) if not doi_m: return entry arxiv_id = doi_m.group(1) entry = type_re.sub("@misc{", entry, count=1) if not howpublished_re.search(entry): # insert howpublished as the last field, before the entry-closing "}" entry = re.sub( r"(,?)(\s*)\}\s*$", lambda m: ("," if m.group(1) != "," else m.group(1)) + m.group(2) + "\thowpublished = {arXiv:" + arxiv_id + "},\n}", entry, count=1, ) return entry # Strip protective braces around words inside title/booktitle values. # BibTeX uses "{Word}" inside titles to preserve case against the bibliography # style's title-casing rules. We keep that protection only when every character # inside the braces is non-lowercase (e.g. acronyms like {NASA}); for ordinary # words like {Quantum} we drop the braces so the style's casing applies. def strip_title_braces(entry): out, i, n = [], 0, len(entry) while True: m = title_field_re.search(entry, i) if not m: out.append(entry[i:]) break out.append(entry[i:m.end()]) depth, j = 1, m.end() while j < n and depth > 0: c = entry[j] if c == "{": depth += 1 elif c == "}": depth -= 1 if depth == 0: break j += 1 value = entry[m.end():j] cleaned = inner_brace_re.sub( lambda mm: mm.group(1) if any(c.islower() for c in mm.group(1)) else mm.group(0), value, ) out.append(cleaned) if j < n: out.append(entry[j]) i = j + 1 return "".join(out) def transform(entry): return strip_title_braces(normalize_arxiv(entry)) parts = split_entries(text) new_text = "".join(transform(p) if kind == "entry" else p for kind, p in parts) with open(path, "w") as f: f.write(new_text) PY