use ammonia::Builder;
use html_escape::decode_html_entities;
use reqwest::Error;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use unicode_general_category::{GeneralCategory, get_general_category};
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Authorship {
pub author_position: String,
pub raw_author_name: String,
}
// TODO: Handle duplicates by having vectors of ids
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Publication {
pub id: String,
pub display_name: Option,
pub authorships: Vec,
pub publication_year: Option,
pub abstract_inverted_index: Option>>,
pub referenced_works: Vec,
}
#[derive(Serialize, Deserialize, Default)]
pub enum SnowballingStep {
#[default]
Backward,
Forward,
}
impl ToString for SnowballingStep {
fn to_string(&self) -> String {
match self {
SnowballingStep::Forward => String::from("forward"),
SnowballingStep::Backward => String::from("backward"),
}
}
}
// TODO: Only store IDs of excluded publications?
#[derive(Serialize, Deserialize, Default)]
pub struct SnowballingIteration {
pub included_publications: Vec,
pub excluded_publications: Vec,
pub step: SnowballingStep,
}
#[derive(Serialize, Deserialize, Default)]
pub struct SnowballingHistory {
pub seed: Vec,
pub current_iteration: SnowballingIteration,
pub previoius_iterations: Vec,
pub pending_publications: Vec,
}
impl SnowballingHistory {
pub fn get_all_included(&self) -> Vec {
vec![self.current_iteration.included_publications.clone()]
.into_iter()
.chain(
self.previoius_iterations
.iter()
.map(|iter| iter.included_publications.clone()),
)
.flatten()
.collect()
}
pub fn get_all_pending(&self) -> Vec {
self.pending_publications.clone()
}
}
impl Publication {
pub fn get_title(&self) -> Option {
self.display_name.clone()
}
pub fn get_year(&self) -> Option {
self.publication_year
}
pub fn get_author_text(&self) -> String {
let mut author_str = self
.authorships
.first()
.map(|authorship| authorship.raw_author_name.clone())
.expect("Papers are required to always have at least one author");
if self.authorships.len() > 1 {
author_str.push_str(" et al.");
}
author_str
}
pub fn get_abstract(&self) -> Option {
self.abstract_inverted_index.clone().map(|content| {
let mut words_with_pos: Vec<(u32, &String)> = Vec::new();
for (word, positions) in &content {
for pos in positions {
words_with_pos.push((*pos, word));
}
}
words_with_pos.sort_by_key(|k| k.0);
let raw_text = words_with_pos
.into_iter()
.map(|(_, word)| word.as_str())
.collect::>()
.join(" ");
let cleaner = Builder::empty().clean(&raw_text).to_string();
let decoded = decode_html_entities(&cleaner);
let cleaned: String = decoded
.chars()
.filter(|&c| {
let cat = get_general_category(c);
!matches!(
cat,
GeneralCategory::Control
| GeneralCategory::Format
| GeneralCategory::Surrogate
| GeneralCategory::PrivateUse
| GeneralCategory::Unassigned
) || c.is_whitespace()
})
.collect();
cleaned.split_whitespace().collect::>().join(" ")
})
}
}
// #[derive(Serialize, Deserialize, Debug)]
// pub struct OpenAlexResponse {
// pub results: Vec,
// }
pub async fn get_publication_by_id(
api_link: &str,
email: &str,
) -> Result {
let url = format!("{}&mailto={}", api_link, email);
let client = reqwest::Client::new();
let response = client
.get(url)
.header("User-Agent", "Rust-OpenAlex-Client/1.0")
.send()
.await?
.json::()
.await?;
Ok(response)
}
// // TODO: Get all papers, not just the first page
// pub async fn get_citing_papers(
// target_id: &str,
// email: &str,
// ) -> Result, Error> {
// let url = format!(
// "https://api.openalex.org/works?filter=cites:{}&mailto={}",
// target_id, email
// );
//
// let client = reqwest::Client::new();
// let response = client
// .get(url)
// .header("User-Agent", "Rust-OpenAlex-Client/1.0")
// .send()
// .await?
// .json::()
// .await?;
//
// Ok(response.results)
// }