use ammonia::Builder; use html_escape::decode_html_entities; use reqwest::Error; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use unicode_general_category::{GeneralCategory, get_general_category}; #[derive(Serialize, Deserialize, Debug, Clone)] pub struct Authorship { pub author_position: String, pub raw_author_name: String, } // TODO: Handle duplicates by having vectors of ids #[derive(Serialize, Deserialize, Debug, Clone)] pub struct Publication { pub id: String, pub display_name: Option, pub authorships: Vec, pub publication_year: Option, pub abstract_inverted_index: Option>>, pub referenced_works: Vec, } #[derive(Serialize, Deserialize, Default)] pub enum SnowballingStep { #[default] Backward, Forward, } impl ToString for SnowballingStep { fn to_string(&self) -> String { match self { SnowballingStep::Forward => String::from("forward"), SnowballingStep::Backward => String::from("backward"), } } } // TODO: Only store IDs of excluded publications? #[derive(Serialize, Deserialize, Default)] pub struct SnowballingIteration { pub included_publications: Vec, pub excluded_publications: Vec, pub step: SnowballingStep, } #[derive(Serialize, Deserialize, Default)] pub struct SnowballingHistory { pub seed: Vec, pub current_iteration: SnowballingIteration, pub previoius_iterations: Vec, pub pending_publications: Vec, } impl SnowballingHistory { pub fn get_all_included(&self) -> Vec { vec![self.current_iteration.included_publications.clone()] .into_iter() .chain( self.previoius_iterations .iter() .map(|iter| iter.included_publications.clone()), ) .flatten() .collect() } pub fn get_all_pending(&self) -> Vec { self.pending_publications.clone() } } impl Publication { pub fn get_title(&self) -> Option { self.display_name.clone() } pub fn get_year(&self) -> Option { self.publication_year } pub fn get_author_text(&self) -> String { let mut author_str = self .authorships .first() .map(|authorship| authorship.raw_author_name.clone()) .expect("Papers are required to always have at least one author"); if self.authorships.len() > 1 { author_str.push_str(" et al."); } author_str } pub fn get_abstract(&self) -> Option { self.abstract_inverted_index.clone().map(|content| { let mut words_with_pos: Vec<(u32, &String)> = Vec::new(); for (word, positions) in &content { for pos in positions { words_with_pos.push((*pos, word)); } } words_with_pos.sort_by_key(|k| k.0); let raw_text = words_with_pos .into_iter() .map(|(_, word)| word.as_str()) .collect::>() .join(" "); let cleaner = Builder::empty().clean(&raw_text).to_string(); let decoded = decode_html_entities(&cleaner); let cleaned: String = decoded .chars() .filter(|&c| { let cat = get_general_category(c); !matches!( cat, GeneralCategory::Control | GeneralCategory::Format | GeneralCategory::Surrogate | GeneralCategory::PrivateUse | GeneralCategory::Unassigned ) || c.is_whitespace() }) .collect(); cleaned.split_whitespace().collect::>().join(" ") }) } } // #[derive(Serialize, Deserialize, Debug)] // pub struct OpenAlexResponse { // pub results: Vec, // } pub async fn get_publication_by_id( api_link: &str, email: &str, ) -> Result { let url = format!("{}&mailto={}", api_link, email); let client = reqwest::Client::new(); let response = client .get(url) .header("User-Agent", "Rust-OpenAlex-Client/1.0") .send() .await? .json::() .await?; Ok(response) } // // TODO: Get all papers, not just the first page // pub async fn get_citing_papers( // target_id: &str, // email: &str, // ) -> Result, Error> { // let url = format!( // "https://api.openalex.org/works?filter=cites:{}&mailto={}", // target_id, email // ); // // let client = reqwest::Client::new(); // let response = client // .get(url) // .header("User-Agent", "Rust-OpenAlex-Client/1.0") // .send() // .await? // .json::() // .await?; // // Ok(response.results) // }