97 lines
2.6 KiB
Rust
97 lines
2.6 KiB
Rust
use ammonia::Builder;
|
|
use reqwest::Error;
|
|
use serde::{Deserialize, Serialize};
|
|
use std::{collections::HashMap};
|
|
|
|
#[derive(Serialize, Deserialize, Debug)]
|
|
pub struct Authorship {
|
|
pub author_position: String,
|
|
pub raw_author_name: String,
|
|
}
|
|
|
|
// TODO: Handle duplicates by having vectors of ids
|
|
#[derive(Serialize, Deserialize, Debug)]
|
|
pub struct Publication {
|
|
pub id: String,
|
|
pub display_name: Option<String>,
|
|
pub authorships: Vec<Authorship>,
|
|
pub publication_year: Option<u32>,
|
|
pub abstract_inverted_index: Option<HashMap<String, Vec<u32>>>,
|
|
pub referenced_works: Vec<String>,
|
|
}
|
|
|
|
impl Publication {
|
|
pub fn get_title(&self) -> Option<String> {
|
|
self.display_name.clone()
|
|
}
|
|
|
|
pub fn get_year(&self) -> Option<u32> {
|
|
self.publication_year
|
|
}
|
|
|
|
pub fn get_author_text(&self) -> String {
|
|
let mut author_str = self
|
|
.authorships
|
|
.first()
|
|
.map(|authorship| authorship.raw_author_name.clone())
|
|
.expect("Papers are required to always have at least one author");
|
|
|
|
if self.authorships.len() > 1 {
|
|
author_str.push_str(" et al.");
|
|
}
|
|
|
|
author_str
|
|
}
|
|
|
|
pub fn get_abstract(&self) -> Option<String> {
|
|
self.abstract_inverted_index.clone().map(|content| {
|
|
let mut words_with_pos: Vec<(u32, &String)> = Vec::new();
|
|
|
|
for (word, positions) in &content {
|
|
for pos in positions {
|
|
words_with_pos.push((*pos, word));
|
|
}
|
|
}
|
|
|
|
words_with_pos.sort_by_key(|k| k.0);
|
|
|
|
let unsanitized = words_with_pos
|
|
.into_iter()
|
|
.map(|(_, word)| word.as_str())
|
|
.collect::<Vec<_>>()
|
|
.join(" ");
|
|
|
|
let cleaner = Builder::empty();
|
|
let sanitized = cleaner.clean(&unsanitized).to_string();
|
|
sanitized.replace("\u{a0}", " ").trim().to_string()
|
|
})
|
|
}
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize, Debug)]
|
|
pub struct OpenAlexResponse {
|
|
pub results: Vec<Publication>,
|
|
}
|
|
|
|
// TODO: Get all papers, not just the first page
|
|
pub async fn get_citing_papers(
|
|
target_id: &str,
|
|
email: &str,
|
|
) -> Result<Vec<Publication>, Error> {
|
|
let url = format!(
|
|
"https://api.openalex.org/works?filter=cites:{}&mailto={}",
|
|
target_id, email
|
|
);
|
|
|
|
let client = reqwest::Client::new();
|
|
let response = client
|
|
.get(url)
|
|
.header("User-Agent", "Rust-OpenAlex-Client/1.0")
|
|
.send()
|
|
.await?
|
|
.json::<OpenAlexResponse>()
|
|
.await?;
|
|
|
|
Ok(response.results)
|
|
}
|