Start implementing snowballing functionality; add logging; properly sanitize abstracts
This commit is contained in:
parent
14f503a554
commit
4901a2897f
81
Cargo.lock
generated
81
Cargo.lock
generated
@ -494,6 +494,29 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_filter"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2"
|
||||
dependencies = [
|
||||
"log",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.11.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"env_filter",
|
||||
"jiff",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.2"
|
||||
@ -724,6 +747,15 @@ version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
||||
|
||||
[[package]]
|
||||
name = "html-escape"
|
||||
version = "0.2.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476"
|
||||
dependencies = [
|
||||
"utf8-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.35.0"
|
||||
@ -1059,6 +1091,30 @@ version = "1.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
|
||||
|
||||
[[package]]
|
||||
name = "jiff"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a87d9b8105c23642f50cbbae03d1f75d8422c5cb98ce7ee9271f7ff7505be6b8"
|
||||
dependencies = [
|
||||
"jiff-static",
|
||||
"log",
|
||||
"portable-atomic",
|
||||
"portable-atomic-util",
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jiff-static"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b787bebb543f8969132630c51fd0afab173a86c6abae56ff3b9e5e3e3f9f6e58"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.111",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.83"
|
||||
@ -1549,6 +1605,15 @@ version = "1.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950"
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic-util"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
|
||||
dependencies = [
|
||||
"portable-atomic",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "potential_utf"
|
||||
version = "0.1.4"
|
||||
@ -1793,6 +1858,9 @@ dependencies = [
|
||||
"ammonia",
|
||||
"clap",
|
||||
"crossterm",
|
||||
"env_logger",
|
||||
"html-escape",
|
||||
"log",
|
||||
"open",
|
||||
"ratatui",
|
||||
"reqwest",
|
||||
@ -1800,6 +1868,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"textwrap",
|
||||
"tokio",
|
||||
"unicode-general-category",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2495,6 +2564,12 @@ version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-general-category"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b993bddc193ae5bd0d623b49ec06ac3e9312875fdae725a975c51db1cc1677f"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.22"
|
||||
@ -2554,6 +2629,12 @@ version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||
|
||||
[[package]]
|
||||
name = "utf8-width"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1292c0d970b54115d14f2492fe0170adf21d68a1de108eebc51c1df4f346a091"
|
||||
|
||||
[[package]]
|
||||
name = "utf8_iter"
|
||||
version = "1.0.4"
|
||||
|
||||
@ -7,6 +7,9 @@ edition = "2024"
|
||||
ammonia = "4.1.2"
|
||||
clap = { version = "4.5.53", features = ["derive"] }
|
||||
crossterm = "0.29.0"
|
||||
env_logger = "0.11.8"
|
||||
html-escape = "0.2.13"
|
||||
log = "0.4.29"
|
||||
open = "5.3.3"
|
||||
ratatui = "0.30.0"
|
||||
reqwest = { version = "0.12.28", features = ["json"] }
|
||||
@ -14,3 +17,4 @@ serde = { version = "1.0.228", features = ["derive"] }
|
||||
serde_json = "1.0.148"
|
||||
textwrap = "0.16.2"
|
||||
tokio = { version = "1.48.0", features = ["full"] }
|
||||
unicode-general-category = "1.1.0"
|
||||
|
||||
78
src/app.rs
78
src/app.rs
@ -1,7 +1,11 @@
|
||||
use core::panic;
|
||||
|
||||
use ratatui::{crossterm::event::KeyCode, widgets::ListState};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::snowballing::Publication;
|
||||
use crate::snowballing::{Publication, get_publication_by_id};
|
||||
|
||||
use log::{debug, info, warn, error};
|
||||
|
||||
#[derive(Serialize, Deserialize, Default, PartialEq)]
|
||||
pub enum ActivePane {
|
||||
@ -20,8 +24,8 @@ pub enum ActiveTab {
|
||||
#[derive(Serialize, Deserialize, Default)]
|
||||
pub enum SnowballingStep {
|
||||
#[default]
|
||||
Forward,
|
||||
Backward,
|
||||
Forward,
|
||||
}
|
||||
|
||||
impl ToString for SnowballingStep {
|
||||
@ -104,14 +108,29 @@ pub struct App {
|
||||
pub should_quit: bool,
|
||||
}
|
||||
|
||||
// TODO: Implement exclusion and inclusion of papers (e.g., X and Y chars)
|
||||
// TODO: Implement moving through steps and iterations (populating pending papers)
|
||||
// TODO: Implement input of seed papers using IDs
|
||||
// TODO: Implement exclusion and inclusion of papers (e.g., X and Y chars)
|
||||
// TODO: Implement possibility of pushing excluded papers back into pending
|
||||
// TODO: Implement export of included papers as csv for keywording with a spreadsheet
|
||||
// TODO: Implement export of included papers into zotero (Use RIS format somehow)
|
||||
impl App {
|
||||
pub fn handle_key(&mut self, key: KeyCode) {
|
||||
// TODO: Show error somehow
|
||||
pub async fn add_seed_paper(&mut self, api_link: &String) {
|
||||
let publ =
|
||||
get_publication_by_id(api_link, "an.tsouchlos@gmail.com").await;
|
||||
|
||||
match publ {
|
||||
Ok(publ) => self.included_publications.push(publ),
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"Failed to get publication metadata using OpenAlex API: {}",
|
||||
err
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn handle_key(&mut self, key: KeyCode) {
|
||||
if KeyCode::Esc == key {
|
||||
self.should_quit = true;
|
||||
return;
|
||||
@ -123,7 +142,7 @@ impl App {
|
||||
self.active_tab = ActiveTab::Snowballing;
|
||||
}
|
||||
KeyCode::Enter => {
|
||||
// TODO: Actually add paper to included list
|
||||
self.add_seed_paper(&self.seeding_input.clone()).await;
|
||||
self.seeding_input.clear();
|
||||
}
|
||||
KeyCode::Char(to_insert) => self.seeding_input.push(to_insert),
|
||||
@ -136,6 +155,53 @@ impl App {
|
||||
_ => {}
|
||||
},
|
||||
ActiveTab::Snowballing => match key {
|
||||
KeyCode::Char(' ') => {
|
||||
if self.pending_publications.len() > 0 {
|
||||
warn!(
|
||||
"The next snowballing step can only be initiated \
|
||||
after screening all pending publications"
|
||||
);
|
||||
// TODO: Show warning/error somehow
|
||||
return;
|
||||
}
|
||||
|
||||
match self.snowballing_step {
|
||||
SnowballingStep::Forward => {
|
||||
// TODO: Implement
|
||||
}
|
||||
SnowballingStep::Backward => {
|
||||
for publication in &self.included_publications {
|
||||
for reference in &publication.referenced_works {
|
||||
let api_link = format!(
|
||||
"https://api.openalex.org/{}",
|
||||
&reference[21..]
|
||||
);
|
||||
// https://openalex.org/W2085881930
|
||||
let publ = get_publication_by_id(
|
||||
&api_link,
|
||||
"an.tsouchlos@gmail.com",
|
||||
)
|
||||
.await;
|
||||
|
||||
match publ {
|
||||
Ok(publ) => {
|
||||
self.pending_publications.push(publ)
|
||||
}
|
||||
// TODO: Show error somehow
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"Failed to get publication\
|
||||
metadata using OpenAlex API: \
|
||||
{}",
|
||||
err
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
KeyCode::Enter => match self.active_pane {
|
||||
ActivePane::IncludedPublications => {
|
||||
if let Some(idx) = self.included_list_state.selected() {
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
use std::{error::Error, io, time::Duration};
|
||||
|
||||
use log::{warn, error};
|
||||
use ratatui::{
|
||||
Terminal,
|
||||
backend::{Backend, CrosstermBackend},
|
||||
@ -15,7 +16,7 @@ use ratatui::{
|
||||
|
||||
use crate::{app::App, ui};
|
||||
|
||||
pub fn run(app: App) -> Result<App, Box<dyn Error>> {
|
||||
pub async fn run(app: App) -> Result<App, Box<dyn Error>> {
|
||||
// setup terminal
|
||||
enable_raw_mode()?;
|
||||
let mut stdout = io::stdout();
|
||||
@ -24,7 +25,7 @@ pub fn run(app: App) -> Result<App, Box<dyn Error>> {
|
||||
let mut terminal = Terminal::new(backend)?;
|
||||
|
||||
// create app and run it
|
||||
let app_result = run_app(&mut terminal, app);
|
||||
let app_result = run_app(&mut terminal, app).await;
|
||||
|
||||
// restore terminal
|
||||
disable_raw_mode()?;
|
||||
@ -36,13 +37,14 @@ pub fn run(app: App) -> Result<App, Box<dyn Error>> {
|
||||
terminal.show_cursor()?;
|
||||
|
||||
if let Err(err) = &app_result {
|
||||
error!("{err:?}");
|
||||
println!("{err:?}");
|
||||
}
|
||||
|
||||
Ok(app_result?)
|
||||
}
|
||||
|
||||
fn run_app<B: Backend>(
|
||||
async fn run_app<B: Backend>(
|
||||
terminal: &mut Terminal<B>,
|
||||
mut app: App,
|
||||
) -> io::Result<App>
|
||||
@ -52,11 +54,11 @@ where
|
||||
loop {
|
||||
terminal.draw(|frame| ui::draw(frame, &mut app))?;
|
||||
|
||||
if event::poll(Duration::from_millis(10))? {
|
||||
if let Event::Key(key) = event::read()? {
|
||||
app.handle_key(key.code);
|
||||
}
|
||||
// if event::poll(Duration::from_millis(10))? {
|
||||
if let Event::Key(key) = event::read()? {
|
||||
app.handle_key(key.code).await;
|
||||
}
|
||||
// }
|
||||
|
||||
if app.should_quit {
|
||||
return Ok(app);
|
||||
|
||||
26
src/main.rs
26
src/main.rs
@ -54,7 +54,7 @@ fn serialize_savefile(
|
||||
|
||||
use clap::Parser;
|
||||
mod crossterm;
|
||||
use std::error::Error;
|
||||
use std::{env, error::Error, fs::OpenOptions};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "Brittling")]
|
||||
@ -62,13 +62,33 @@ use std::error::Error;
|
||||
struct Args {
|
||||
#[arg(short, long)]
|
||||
savefile: String,
|
||||
|
||||
#[arg(short, long, default_value = "/tmp/snowballing.log")]
|
||||
logfile: String,
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn Error>> {
|
||||
let args = Args::parse();
|
||||
|
||||
if env::var("RUST_LOG").is_err() {
|
||||
unsafe { env::set_var("RUST_LOG", "info") }
|
||||
}
|
||||
|
||||
env_logger::Builder::from_default_env()
|
||||
.format_module_path(false)
|
||||
.target(env_logger::Target::Pipe(Box::new(
|
||||
OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(args.logfile)
|
||||
.unwrap(),
|
||||
)))
|
||||
.init();
|
||||
|
||||
let starting_app_state = deserialize_savefile(&args.savefile)?;
|
||||
|
||||
let final_app_state = crate::crossterm::run(starting_app_state)?;
|
||||
let final_app_state = crate::crossterm::run(starting_app_state).await?;
|
||||
|
||||
serialize_savefile(&final_app_state, &args.savefile)?;
|
||||
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
use ammonia::Builder;
|
||||
use html_escape::decode_html_entities;
|
||||
use reqwest::Error;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{collections::HashMap};
|
||||
use std::collections::HashMap;
|
||||
use unicode_general_category::{GeneralCategory, get_general_category};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct Authorship {
|
||||
@ -55,15 +57,31 @@ impl Publication {
|
||||
|
||||
words_with_pos.sort_by_key(|k| k.0);
|
||||
|
||||
let unsanitized = words_with_pos
|
||||
let raw_text = words_with_pos
|
||||
.into_iter()
|
||||
.map(|(_, word)| word.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
let cleaner = Builder::empty();
|
||||
let sanitized = cleaner.clean(&unsanitized).to_string();
|
||||
sanitized.replace("\u{a0}", " ").trim().to_string()
|
||||
let cleaner = Builder::empty().clean(&raw_text).to_string();
|
||||
let decoded = decode_html_entities(&cleaner);
|
||||
|
||||
let cleaned: String = decoded
|
||||
.chars()
|
||||
.filter(|&c| {
|
||||
let cat = get_general_category(c);
|
||||
!matches!(
|
||||
cat,
|
||||
GeneralCategory::Control
|
||||
| GeneralCategory::Format
|
||||
| GeneralCategory::Surrogate
|
||||
| GeneralCategory::PrivateUse
|
||||
| GeneralCategory::Unassigned
|
||||
) || c.is_whitespace()
|
||||
})
|
||||
.collect();
|
||||
|
||||
cleaned.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -73,6 +91,24 @@ pub struct OpenAlexResponse {
|
||||
pub results: Vec<Publication>,
|
||||
}
|
||||
|
||||
pub async fn get_publication_by_id(
|
||||
api_link: &str,
|
||||
email: &str,
|
||||
) -> Result<Publication, Error> {
|
||||
let url = format!("{}&mailto={}", api_link, email);
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
let response = client
|
||||
.get(url)
|
||||
.header("User-Agent", "Rust-OpenAlex-Client/1.0")
|
||||
.send()
|
||||
.await?
|
||||
.json::<Publication>()
|
||||
.await?;
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
// TODO: Get all papers, not just the first page
|
||||
pub async fn get_citing_papers(
|
||||
target_id: &str,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user