Start implementing snowballing functionality; add logging; properly sanitize abstracts
This commit is contained in:
78
src/app.rs
78
src/app.rs
@@ -1,7 +1,11 @@
|
||||
use core::panic;
|
||||
|
||||
use ratatui::{crossterm::event::KeyCode, widgets::ListState};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::snowballing::Publication;
|
||||
use crate::snowballing::{Publication, get_publication_by_id};
|
||||
|
||||
use log::{debug, info, warn, error};
|
||||
|
||||
#[derive(Serialize, Deserialize, Default, PartialEq)]
|
||||
pub enum ActivePane {
|
||||
@@ -20,8 +24,8 @@ pub enum ActiveTab {
|
||||
#[derive(Serialize, Deserialize, Default)]
|
||||
pub enum SnowballingStep {
|
||||
#[default]
|
||||
Forward,
|
||||
Backward,
|
||||
Forward,
|
||||
}
|
||||
|
||||
impl ToString for SnowballingStep {
|
||||
@@ -104,14 +108,29 @@ pub struct App {
|
||||
pub should_quit: bool,
|
||||
}
|
||||
|
||||
// TODO: Implement exclusion and inclusion of papers (e.g., X and Y chars)
|
||||
// TODO: Implement moving through steps and iterations (populating pending papers)
|
||||
// TODO: Implement input of seed papers using IDs
|
||||
// TODO: Implement exclusion and inclusion of papers (e.g., X and Y chars)
|
||||
// TODO: Implement possibility of pushing excluded papers back into pending
|
||||
// TODO: Implement export of included papers as csv for keywording with a spreadsheet
|
||||
// TODO: Implement export of included papers into zotero (Use RIS format somehow)
|
||||
impl App {
|
||||
pub fn handle_key(&mut self, key: KeyCode) {
|
||||
// TODO: Show error somehow
|
||||
pub async fn add_seed_paper(&mut self, api_link: &String) {
|
||||
let publ =
|
||||
get_publication_by_id(api_link, "an.tsouchlos@gmail.com").await;
|
||||
|
||||
match publ {
|
||||
Ok(publ) => self.included_publications.push(publ),
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"Failed to get publication metadata using OpenAlex API: {}",
|
||||
err
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn handle_key(&mut self, key: KeyCode) {
|
||||
if KeyCode::Esc == key {
|
||||
self.should_quit = true;
|
||||
return;
|
||||
@@ -123,7 +142,7 @@ impl App {
|
||||
self.active_tab = ActiveTab::Snowballing;
|
||||
}
|
||||
KeyCode::Enter => {
|
||||
// TODO: Actually add paper to included list
|
||||
self.add_seed_paper(&self.seeding_input.clone()).await;
|
||||
self.seeding_input.clear();
|
||||
}
|
||||
KeyCode::Char(to_insert) => self.seeding_input.push(to_insert),
|
||||
@@ -136,6 +155,53 @@ impl App {
|
||||
_ => {}
|
||||
},
|
||||
ActiveTab::Snowballing => match key {
|
||||
KeyCode::Char(' ') => {
|
||||
if self.pending_publications.len() > 0 {
|
||||
warn!(
|
||||
"The next snowballing step can only be initiated \
|
||||
after screening all pending publications"
|
||||
);
|
||||
// TODO: Show warning/error somehow
|
||||
return;
|
||||
}
|
||||
|
||||
match self.snowballing_step {
|
||||
SnowballingStep::Forward => {
|
||||
// TODO: Implement
|
||||
}
|
||||
SnowballingStep::Backward => {
|
||||
for publication in &self.included_publications {
|
||||
for reference in &publication.referenced_works {
|
||||
let api_link = format!(
|
||||
"https://api.openalex.org/{}",
|
||||
&reference[21..]
|
||||
);
|
||||
// https://openalex.org/W2085881930
|
||||
let publ = get_publication_by_id(
|
||||
&api_link,
|
||||
"an.tsouchlos@gmail.com",
|
||||
)
|
||||
.await;
|
||||
|
||||
match publ {
|
||||
Ok(publ) => {
|
||||
self.pending_publications.push(publ)
|
||||
}
|
||||
// TODO: Show error somehow
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"Failed to get publication\
|
||||
metadata using OpenAlex API: \
|
||||
{}",
|
||||
err
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
KeyCode::Enter => match self.active_pane {
|
||||
ActivePane::IncludedPublications => {
|
||||
if let Some(idx) = self.included_list_state.selected() {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::{error::Error, io, time::Duration};
|
||||
|
||||
use log::{warn, error};
|
||||
use ratatui::{
|
||||
Terminal,
|
||||
backend::{Backend, CrosstermBackend},
|
||||
@@ -15,7 +16,7 @@ use ratatui::{
|
||||
|
||||
use crate::{app::App, ui};
|
||||
|
||||
pub fn run(app: App) -> Result<App, Box<dyn Error>> {
|
||||
pub async fn run(app: App) -> Result<App, Box<dyn Error>> {
|
||||
// setup terminal
|
||||
enable_raw_mode()?;
|
||||
let mut stdout = io::stdout();
|
||||
@@ -24,7 +25,7 @@ pub fn run(app: App) -> Result<App, Box<dyn Error>> {
|
||||
let mut terminal = Terminal::new(backend)?;
|
||||
|
||||
// create app and run it
|
||||
let app_result = run_app(&mut terminal, app);
|
||||
let app_result = run_app(&mut terminal, app).await;
|
||||
|
||||
// restore terminal
|
||||
disable_raw_mode()?;
|
||||
@@ -36,13 +37,14 @@ pub fn run(app: App) -> Result<App, Box<dyn Error>> {
|
||||
terminal.show_cursor()?;
|
||||
|
||||
if let Err(err) = &app_result {
|
||||
error!("{err:?}");
|
||||
println!("{err:?}");
|
||||
}
|
||||
|
||||
Ok(app_result?)
|
||||
}
|
||||
|
||||
fn run_app<B: Backend>(
|
||||
async fn run_app<B: Backend>(
|
||||
terminal: &mut Terminal<B>,
|
||||
mut app: App,
|
||||
) -> io::Result<App>
|
||||
@@ -52,11 +54,11 @@ where
|
||||
loop {
|
||||
terminal.draw(|frame| ui::draw(frame, &mut app))?;
|
||||
|
||||
if event::poll(Duration::from_millis(10))? {
|
||||
if let Event::Key(key) = event::read()? {
|
||||
app.handle_key(key.code);
|
||||
}
|
||||
// if event::poll(Duration::from_millis(10))? {
|
||||
if let Event::Key(key) = event::read()? {
|
||||
app.handle_key(key.code).await;
|
||||
}
|
||||
// }
|
||||
|
||||
if app.should_quit {
|
||||
return Ok(app);
|
||||
|
||||
26
src/main.rs
26
src/main.rs
@@ -54,7 +54,7 @@ fn serialize_savefile(
|
||||
|
||||
use clap::Parser;
|
||||
mod crossterm;
|
||||
use std::error::Error;
|
||||
use std::{env, error::Error, fs::OpenOptions};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "Brittling")]
|
||||
@@ -62,13 +62,33 @@ use std::error::Error;
|
||||
struct Args {
|
||||
#[arg(short, long)]
|
||||
savefile: String,
|
||||
|
||||
#[arg(short, long, default_value = "/tmp/snowballing.log")]
|
||||
logfile: String,
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn Error>> {
|
||||
let args = Args::parse();
|
||||
|
||||
if env::var("RUST_LOG").is_err() {
|
||||
unsafe { env::set_var("RUST_LOG", "info") }
|
||||
}
|
||||
|
||||
env_logger::Builder::from_default_env()
|
||||
.format_module_path(false)
|
||||
.target(env_logger::Target::Pipe(Box::new(
|
||||
OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(args.logfile)
|
||||
.unwrap(),
|
||||
)))
|
||||
.init();
|
||||
|
||||
let starting_app_state = deserialize_savefile(&args.savefile)?;
|
||||
|
||||
let final_app_state = crate::crossterm::run(starting_app_state)?;
|
||||
let final_app_state = crate::crossterm::run(starting_app_state).await?;
|
||||
|
||||
serialize_savefile(&final_app_state, &args.savefile)?;
|
||||
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
use ammonia::Builder;
|
||||
use html_escape::decode_html_entities;
|
||||
use reqwest::Error;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{collections::HashMap};
|
||||
use std::collections::HashMap;
|
||||
use unicode_general_category::{GeneralCategory, get_general_category};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct Authorship {
|
||||
@@ -55,15 +57,31 @@ impl Publication {
|
||||
|
||||
words_with_pos.sort_by_key(|k| k.0);
|
||||
|
||||
let unsanitized = words_with_pos
|
||||
let raw_text = words_with_pos
|
||||
.into_iter()
|
||||
.map(|(_, word)| word.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
let cleaner = Builder::empty();
|
||||
let sanitized = cleaner.clean(&unsanitized).to_string();
|
||||
sanitized.replace("\u{a0}", " ").trim().to_string()
|
||||
let cleaner = Builder::empty().clean(&raw_text).to_string();
|
||||
let decoded = decode_html_entities(&cleaner);
|
||||
|
||||
let cleaned: String = decoded
|
||||
.chars()
|
||||
.filter(|&c| {
|
||||
let cat = get_general_category(c);
|
||||
!matches!(
|
||||
cat,
|
||||
GeneralCategory::Control
|
||||
| GeneralCategory::Format
|
||||
| GeneralCategory::Surrogate
|
||||
| GeneralCategory::PrivateUse
|
||||
| GeneralCategory::Unassigned
|
||||
) || c.is_whitespace()
|
||||
})
|
||||
.collect();
|
||||
|
||||
cleaned.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -73,6 +91,24 @@ pub struct OpenAlexResponse {
|
||||
pub results: Vec<Publication>,
|
||||
}
|
||||
|
||||
pub async fn get_publication_by_id(
|
||||
api_link: &str,
|
||||
email: &str,
|
||||
) -> Result<Publication, Error> {
|
||||
let url = format!("{}&mailto={}", api_link, email);
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
let response = client
|
||||
.get(url)
|
||||
.header("User-Agent", "Rust-OpenAlex-Client/1.0")
|
||||
.send()
|
||||
.await?
|
||||
.json::<Publication>()
|
||||
.await?;
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
// TODO: Get all papers, not just the first page
|
||||
pub async fn get_citing_papers(
|
||||
target_id: &str,
|
||||
|
||||
Reference in New Issue
Block a user