Fix bibliography titlecase (in clean_bibliography.sh) and a few things in the bibliography itself
This commit is contained in:
@@ -19,6 +19,8 @@ with open(path) as f:
|
||||
doi_re = re.compile(r"doi\s*=\s*\{10\.48550/arXiv\.([^}]+)\}")
|
||||
type_re = re.compile(r"^@([A-Za-z]+)\{", re.MULTILINE)
|
||||
howpublished_re = re.compile(r"^\s*howpublished\s*=\s*\{", re.MULTILINE)
|
||||
title_field_re = re.compile(r"\b(title|booktitle)\s*=\s*\{", re.IGNORECASE)
|
||||
inner_brace_re = re.compile(r"\{([A-Za-z0-9]+)\}")
|
||||
|
||||
# Split into entries by scanning for top-level "@type{...}" blocks. We walk
|
||||
# brace depth so that the closing "}" of the entry is matched correctly even
|
||||
@@ -47,7 +49,7 @@ def split_entries(s):
|
||||
i = j
|
||||
return out
|
||||
|
||||
def transform(entry):
|
||||
def normalize_arxiv(entry):
|
||||
doi_m = doi_re.search(entry)
|
||||
if not doi_m:
|
||||
return entry
|
||||
@@ -64,6 +66,43 @@ def transform(entry):
|
||||
)
|
||||
return entry
|
||||
|
||||
# Strip protective braces around words inside title/booktitle values.
|
||||
# BibTeX uses "{Word}" inside titles to preserve case against the bibliography
|
||||
# style's title-casing rules. We keep that protection only when every character
|
||||
# inside the braces is non-lowercase (e.g. acronyms like {NASA}); for ordinary
|
||||
# words like {Quantum} we drop the braces so the style's casing applies.
|
||||
def strip_title_braces(entry):
|
||||
out, i, n = [], 0, len(entry)
|
||||
while True:
|
||||
m = title_field_re.search(entry, i)
|
||||
if not m:
|
||||
out.append(entry[i:])
|
||||
break
|
||||
out.append(entry[i:m.end()])
|
||||
depth, j = 1, m.end()
|
||||
while j < n and depth > 0:
|
||||
c = entry[j]
|
||||
if c == "{":
|
||||
depth += 1
|
||||
elif c == "}":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
break
|
||||
j += 1
|
||||
value = entry[m.end():j]
|
||||
cleaned = inner_brace_re.sub(
|
||||
lambda mm: mm.group(1) if any(c.islower() for c in mm.group(1)) else mm.group(0),
|
||||
value,
|
||||
)
|
||||
out.append(cleaned)
|
||||
if j < n:
|
||||
out.append(entry[j])
|
||||
i = j + 1
|
||||
return "".join(out)
|
||||
|
||||
def transform(entry):
|
||||
return strip_title_braces(normalize_arxiv(entry))
|
||||
|
||||
parts = split_entries(text)
|
||||
new_text = "".join(transform(p) if kind == "entry" else p for kind, p in parts)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user