Clean up architecture
This commit is contained in:
12
tools/brittle-seed/Cargo.toml
Normal file
12
tools/brittle-seed/Cargo.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[package]
|
||||
name = "brittle-seed"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "brittle-seed"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
brittle-core = { path = "../../brittle-core" }
|
||||
ureq = "2"
|
||||
286
tools/brittle-seed/src/main.rs
Normal file
286
tools/brittle-seed/src/main.rs
Normal file
@@ -0,0 +1,286 @@
|
||||
//! Creates an example Brittle repository with realistic academic references.
|
||||
//!
|
||||
//! For references that have freely available PDFs (arXiv preprints and open
|
||||
//! author copies), the script downloads the PDF and attaches it to the
|
||||
//! reference. Downloads that fail are skipped with a warning so the seed
|
||||
//! always completes even without network access.
|
||||
//!
|
||||
//! Usage:
|
||||
//! brittle-seed [PATH]
|
||||
//!
|
||||
//! PATH defaults to `~/brittle-example`. The directory must not already
|
||||
//! contain a git repository.
|
||||
|
||||
use std::io::Read;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use brittle_core::{Brittle, EntryType, FsStore, Person, ReferenceId};
|
||||
|
||||
fn main() {
|
||||
let path = match std::env::args().nth(1) {
|
||||
Some(p) => PathBuf::from(p),
|
||||
None => {
|
||||
let home = std::env::var("HOME").expect("HOME not set");
|
||||
PathBuf::from(home).join("brittle-example")
|
||||
}
|
||||
};
|
||||
|
||||
if path.join(".git").exists() {
|
||||
eprintln!("error: {} already contains a git repository", path.display());
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
std::fs::create_dir_all(&path).expect("could not create directory");
|
||||
|
||||
println!("Creating repository at {} …", path.display());
|
||||
let mut b = Brittle::create(&path).expect("create repository");
|
||||
|
||||
// ── Libraries ─────────────────────────────────────────────────────────────
|
||||
|
||||
let cs = b.create_library("Computer Science", None).unwrap();
|
||||
let ml = b.create_library("Machine Learning", Some(cs.id)).unwrap();
|
||||
let sys = b.create_library("Systems", Some(cs.id)).unwrap();
|
||||
let math = b.create_library("Mathematics", None).unwrap();
|
||||
let pl = b.create_library("Programming Languages", Some(cs.id)).unwrap();
|
||||
|
||||
// ── References ────────────────────────────────────────────────────────────
|
||||
|
||||
// -- Machine Learning --
|
||||
|
||||
let mut r = b.create_reference("lecun1998gradient", EntryType::Article).unwrap();
|
||||
r.authors = vec![
|
||||
person("LeCun", "Yann"),
|
||||
person("Bottou", "Léon"),
|
||||
person("Bengio", "Yoshua"),
|
||||
person("Haffner", "Patrick"),
|
||||
];
|
||||
r.fields.insert("title".into(), "Gradient-based learning applied to document recognition".into());
|
||||
r.fields.insert("journal".into(), "Proceedings of the IEEE".into());
|
||||
r.fields.insert("volume".into(), "86".into());
|
||||
r.fields.insert("number".into(), "11".into());
|
||||
r.fields.insert("pages".into(), "2278--2324".into());
|
||||
r.fields.insert("year".into(), "1998".into());
|
||||
let id = r.id;
|
||||
b.update_reference(r).unwrap();
|
||||
b.add_to_library(ml.id, id).unwrap();
|
||||
attach_pdf(&mut b, id, "http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf");
|
||||
|
||||
let mut r = b.create_reference("vaswani2017attention", EntryType::InProceedings).unwrap();
|
||||
r.authors = vec![
|
||||
person("Vaswani", "Ashish"),
|
||||
person("Shazeer", "Noam"),
|
||||
person("Parmar", "Niki"),
|
||||
person("Uszkoreit", "Jakob"),
|
||||
person("Jones", "Llion"),
|
||||
person("Gomez", "Aidan N."),
|
||||
person("Kaiser", "Łukasz"),
|
||||
person("Polosukhin", "Illia"),
|
||||
];
|
||||
r.fields.insert("title".into(), "Attention Is All You Need".into());
|
||||
r.fields.insert("booktitle".into(), "Advances in Neural Information Processing Systems".into());
|
||||
r.fields.insert("volume".into(), "30".into());
|
||||
r.fields.insert("year".into(), "2017".into());
|
||||
let id = r.id;
|
||||
b.update_reference(r).unwrap();
|
||||
b.add_to_library(ml.id, id).unwrap();
|
||||
attach_pdf(&mut b, id, "https://arxiv.org/pdf/1706.03762");
|
||||
|
||||
let mut r = b.create_reference("goodfellow2016deep", EntryType::Book).unwrap();
|
||||
r.authors = vec![
|
||||
person("Goodfellow", "Ian"),
|
||||
person("Bengio", "Yoshua"),
|
||||
person("Courville", "Aaron"),
|
||||
];
|
||||
r.fields.insert("title".into(), "Deep Learning".into());
|
||||
r.fields.insert("publisher".into(), "MIT Press".into());
|
||||
r.fields.insert("year".into(), "2016".into());
|
||||
r.fields.insert("url".into(), "http://www.deeplearningbook.org".into());
|
||||
let id = r.id;
|
||||
b.update_reference(r).unwrap();
|
||||
b.add_to_library(ml.id, id).unwrap();
|
||||
// No freely available PDF for this book.
|
||||
|
||||
let mut r = b.create_reference("ho2020denoising", EntryType::InProceedings).unwrap();
|
||||
r.authors = vec![
|
||||
person("Ho", "Jonathan"),
|
||||
person("Jain", "Ajay"),
|
||||
person("Abbeel", "Pieter"),
|
||||
];
|
||||
r.fields.insert("title".into(), "Denoising Diffusion Probabilistic Models".into());
|
||||
r.fields.insert("booktitle".into(), "Advances in Neural Information Processing Systems".into());
|
||||
r.fields.insert("volume".into(), "33".into());
|
||||
r.fields.insert("pages".into(), "6840--6851".into());
|
||||
r.fields.insert("year".into(), "2020".into());
|
||||
let id = r.id;
|
||||
b.update_reference(r).unwrap();
|
||||
b.add_to_library(ml.id, id).unwrap();
|
||||
attach_pdf(&mut b, id, "https://arxiv.org/pdf/2006.11239");
|
||||
|
||||
// -- Systems --
|
||||
|
||||
let mut r = b.create_reference("lamport1978time", EntryType::Article).unwrap();
|
||||
r.authors = vec![person("Lamport", "Leslie")];
|
||||
r.fields.insert("title".into(), "Time, Clocks, and the Ordering of Events in a Distributed System".into());
|
||||
r.fields.insert("journal".into(), "Communications of the ACM".into());
|
||||
r.fields.insert("volume".into(), "21".into());
|
||||
r.fields.insert("number".into(), "7".into());
|
||||
r.fields.insert("pages".into(), "558--565".into());
|
||||
r.fields.insert("year".into(), "1978".into());
|
||||
let id = r.id;
|
||||
b.update_reference(r).unwrap();
|
||||
b.add_to_library(sys.id, id).unwrap();
|
||||
attach_pdf(&mut b, id, "https://lamport.azurewebsites.net/pubs/time-clocks.pdf");
|
||||
|
||||
let mut r = b.create_reference("rosenblum1992lfs", EntryType::Article).unwrap();
|
||||
r.authors = vec![
|
||||
person("Rosenblum", "Mendel"),
|
||||
person("Ousterhout", "John K."),
|
||||
];
|
||||
r.fields.insert("title".into(), "The Design and Implementation of a Log-Structured File System".into());
|
||||
r.fields.insert("journal".into(), "ACM Transactions on Computer Systems".into());
|
||||
r.fields.insert("volume".into(), "10".into());
|
||||
r.fields.insert("number".into(), "1".into());
|
||||
r.fields.insert("pages".into(), "26--52".into());
|
||||
r.fields.insert("year".into(), "1992".into());
|
||||
let id = r.id;
|
||||
b.update_reference(r).unwrap();
|
||||
b.add_to_library(sys.id, id).unwrap();
|
||||
// Paywalled; no freely available PDF.
|
||||
|
||||
let mut r = b.create_reference("dean2004mapreduce", EntryType::InProceedings).unwrap();
|
||||
r.authors = vec![
|
||||
person("Dean", "Jeffrey"),
|
||||
person("Ghemawat", "Sanjay"),
|
||||
];
|
||||
r.fields.insert("title".into(), "MapReduce: Simplified Data Processing on Large Clusters".into());
|
||||
r.fields.insert("booktitle".into(), "OSDI".into());
|
||||
r.fields.insert("pages".into(), "137--150".into());
|
||||
r.fields.insert("year".into(), "2004".into());
|
||||
let id = r.id;
|
||||
b.update_reference(r).unwrap();
|
||||
b.add_to_library(sys.id, id).unwrap();
|
||||
attach_pdf(&mut b, id, "https://static.googleusercontent.com/media/research.google.com/en//archive/mapreduce-osdi04.pdf");
|
||||
|
||||
// -- Programming Languages --
|
||||
|
||||
let mut r = b.create_reference("milner1978polymorphism", EntryType::Article).unwrap();
|
||||
r.authors = vec![person("Milner", "Robin")];
|
||||
r.fields.insert("title".into(), "A Theory of Type Polymorphism in Programming".into());
|
||||
r.fields.insert("journal".into(), "Journal of Computer and System Sciences".into());
|
||||
r.fields.insert("volume".into(), "17".into());
|
||||
r.fields.insert("number".into(), "3".into());
|
||||
r.fields.insert("pages".into(), "348--375".into());
|
||||
r.fields.insert("year".into(), "1978".into());
|
||||
let id = r.id;
|
||||
b.update_reference(r).unwrap();
|
||||
b.add_to_library(pl.id, id).unwrap();
|
||||
// Paywalled; no freely available PDF.
|
||||
|
||||
let mut r = b.create_reference("matsakis2014rust", EntryType::InProceedings).unwrap();
|
||||
r.authors = vec![
|
||||
person("Matsakis", "Nicholas D."),
|
||||
person("Klock", "Felix S."),
|
||||
];
|
||||
r.fields.insert("title".into(), "The Rust Language".into());
|
||||
r.fields.insert("booktitle".into(), "ACM SIGAda Annual Conference on High Integrity Language Technology".into());
|
||||
r.fields.insert("pages".into(), "103--104".into());
|
||||
r.fields.insert("year".into(), "2014".into());
|
||||
let id = r.id;
|
||||
b.update_reference(r).unwrap();
|
||||
b.add_to_library(pl.id, id).unwrap();
|
||||
// Paywalled; no freely available PDF.
|
||||
|
||||
// -- Mathematics --
|
||||
|
||||
let mut r = b.create_reference("turing1936computable", EntryType::Article).unwrap();
|
||||
r.authors = vec![person("Turing", "Alan M.")];
|
||||
r.fields.insert("title".into(), "On Computable Numbers, with an Application to the Entscheidungsproblem".into());
|
||||
r.fields.insert("journal".into(), "Proceedings of the London Mathematical Society".into());
|
||||
r.fields.insert("volume".into(), "42".into());
|
||||
r.fields.insert("number".into(), "1".into());
|
||||
r.fields.insert("pages".into(), "230--265".into());
|
||||
r.fields.insert("year".into(), "1936".into());
|
||||
let id = r.id;
|
||||
b.update_reference(r).unwrap();
|
||||
b.add_to_library(math.id, id).unwrap();
|
||||
// No freely available PDF.
|
||||
|
||||
let mut r = b.create_reference("knuth1984texbook", EntryType::Book).unwrap();
|
||||
r.authors = vec![person("Knuth", "Donald E.")];
|
||||
r.fields.insert("title".into(), "The TeXbook".into());
|
||||
r.fields.insert("publisher".into(), "Addison-Wesley".into());
|
||||
r.fields.insert("year".into(), "1984".into());
|
||||
r.fields.insert("series".into(), "Computers and Typesetting".into());
|
||||
r.fields.insert("volume".into(), "A".into());
|
||||
let id = r.id;
|
||||
b.update_reference(r).unwrap();
|
||||
b.add_to_library(math.id, id).unwrap();
|
||||
// Copyrighted book; no freely available PDF.
|
||||
|
||||
// A reference in both ML and Mathematics (cross-library membership).
|
||||
let mut r = b.create_reference("cybenko1989approximation", EntryType::Article).unwrap();
|
||||
r.authors = vec![person("Cybenko", "George")];
|
||||
r.fields.insert("title".into(), "Approximation by Superpositions of a Sigmoidal Function".into());
|
||||
r.fields.insert("journal".into(), "Mathematics of Control, Signals, and Systems".into());
|
||||
r.fields.insert("volume".into(), "2".into());
|
||||
r.fields.insert("number".into(), "4".into());
|
||||
r.fields.insert("pages".into(), "303--314".into());
|
||||
r.fields.insert("year".into(), "1989".into());
|
||||
let id = r.id;
|
||||
b.update_reference(r).unwrap();
|
||||
b.add_to_library(ml.id, id).unwrap();
|
||||
b.add_to_library(math.id, id).unwrap();
|
||||
// Paywalled; no freely available PDF.
|
||||
|
||||
println!();
|
||||
println!("Done.");
|
||||
println!();
|
||||
println!(" Libraries : Computer Science (Machine Learning, Systems, Programming Languages), Mathematics");
|
||||
println!(" References: 12 across all libraries");
|
||||
println!();
|
||||
println!("Open the repository in Brittle with: :open {}", path.display());
|
||||
}
|
||||
|
||||
// ── PDF download ──────────────────────────────────────────────────────────────
|
||||
|
||||
/// Download the PDF at `url` and attach it to `id`. Prints progress and
|
||||
/// skips silently on any error so the seed always completes.
|
||||
fn attach_pdf(b: &mut Brittle<FsStore>, id: ReferenceId, url: &str) {
|
||||
let label = url.rsplit('/').next().unwrap_or(url);
|
||||
print!(" ↓ {label} … ");
|
||||
std::io::Write::flush(&mut std::io::stdout()).ok();
|
||||
|
||||
match download(url) {
|
||||
Err(e) => println!("skipped ({e})"),
|
||||
Ok(bytes) => {
|
||||
let tmp = std::env::temp_dir().join(format!("{id}.pdf"));
|
||||
if let Err(e) = std::fs::write(&tmp, &bytes) {
|
||||
println!("skipped (write: {e})");
|
||||
return;
|
||||
}
|
||||
match b.attach_pdf(id, &tmp) {
|
||||
Ok(_) => println!("{} KB", bytes.len() / 1024),
|
||||
Err(e) => println!("skipped (attach: {e})"),
|
||||
}
|
||||
let _ = std::fs::remove_file(&tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn download(url: &str) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
|
||||
let resp = ureq::get(url).call()?;
|
||||
let mut buf = Vec::new();
|
||||
resp.into_reader().read_to_end(&mut buf)?;
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
fn person(family: &str, given: &str) -> Person {
|
||||
Person {
|
||||
family: family.into(),
|
||||
given: Some(given.into()),
|
||||
prefix: None,
|
||||
suffix: None,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user