use ammonia::Builder; use reqwest::Error; use serde::{Deserialize, Serialize}; use std::{collections::HashMap}; #[derive(Serialize, Deserialize, Debug)] pub struct Authorship { pub author_position: String, pub raw_author_name: String, } // TODO: Handle duplicates by having vectors of ids #[derive(Serialize, Deserialize, Debug)] pub struct Publication { pub id: String, pub display_name: Option, pub authorships: Vec, pub publication_year: Option, pub abstract_inverted_index: Option>>, pub referenced_works: Vec, } impl Publication { pub fn get_title(&self) -> Option { self.display_name.clone() } pub fn get_year(&self) -> Option { self.publication_year } pub fn get_author_text(&self) -> String { let mut author_str = self .authorships .first() .map(|authorship| authorship.raw_author_name.clone()) .expect("Papers are required to always have at least one author"); if self.authorships.len() > 1 { author_str.push_str(" et al."); } author_str } pub fn get_abstract(&self) -> Option { self.abstract_inverted_index.clone().map(|content| { let mut words_with_pos: Vec<(u32, &String)> = Vec::new(); for (word, positions) in &content { for pos in positions { words_with_pos.push((*pos, word)); } } words_with_pos.sort_by_key(|k| k.0); let unsanitized = words_with_pos .into_iter() .map(|(_, word)| word.as_str()) .collect::>() .join(" "); let cleaner = Builder::empty(); let sanitized = cleaner.clean(&unsanitized).to_string(); sanitized.replace("\u{a0}", " ").trim().to_string() }) } } #[derive(Serialize, Deserialize, Debug)] pub struct OpenAlexResponse { pub results: Vec, } // TODO: Get all papers, not just the first page pub async fn get_citing_papers( target_id: &str, email: &str, ) -> Result, Error> { let url = format!( "https://api.openalex.org/works?filter=cites:{}&mailto={}", target_id, email ); let client = reqwest::Client::new(); let response = client .get(url) .header("User-Agent", "Rust-OpenAlex-Client/1.0") .send() .await? .json::() .await?; Ok(response.results) }