Source code for bib_cleaner.bib_cleaner
"""Main module."""
import re
[docs]def get_minimal_bib(master_bib, all_tags):
"""Get the minimal bib file contents.
Parameters
----------
master_bib : str
Name of master bibliography file which has all citations.
all_tags : List[str]
List of all tags from the TeX files. (where tags are the names that go
in `cite{}` commands in TeX files.)
Returns
-------
new_contents : str
Contents of the cleaned bib file.
n_used_bibs : int
Number of bib entries used in TeX files.
n_total_bibs : int
Number of bib entries in `master.bib`.
len_contents : int
Number of lines in `master.bib`.
"""
# Variable definitions
add_flag = 0
n_total_bibs = 0
n_used_bibs = 0
this_entry = ""
new_lines = []
new_tags = []
# Extract contents of master bib file
with open(master_bib, encoding="utf-8") as f:
contents = f.read()
contents = contents.split("\n")
# Convert raw contents into list of bib entries
for line in contents:
# Skip blank lines
if not line:
continue
if line[0] == "@":
add_flag = 1
n_total_bibs += 1
# Optionally use a regex
# this_tag = re.findall(r"\{[\w-]+\,", line)
# print(f"This line = {line}, tag = {this_tag[0][1:-1]}")
start_ind = line.find("{")
end_ind = line.find(",")
this_tag = line[start_ind + 1 : end_ind]
if line[0] == "}":
add_flag = 0
if this_tag in all_tags:
this_entry += "}\n"
new_lines += this_entry
new_tags += [this_tag]
n_used_bibs += 1
this_entry = ""
if add_flag:
this_entry += line + "\n"
diff = set(all_tags) - set(new_tags)
if len(diff) > 0:
print(
f"You used the following {len(diff)} citations in your TeX files, but they are not in {master_bib}!"
)
print(diff)
else:
print(f"All citations from TeX files were found in {master_bib}")
new_contents = "".join(new_lines)
len_contents = len(contents)
return (new_contents, n_used_bibs, n_total_bibs, len_contents)