Source code for scipost.services

__copyright__ = "Copyright © Stichting SciPost (SciPost Foundation)"
__license__ = "AGPL v3"


# Module for making external api calls as needed in the submissions cycle
import feedparser
import requests
import datetime
import dateutil.parser
import logging

from submissions.constants import FIGSHARE_PREPRINT_SERVERS
from submissions.models import PreprintServer

arxiv_logger = logging.getLogger("scipost.services.arxiv")
doi_logger = logging.getLogger("scipost.services.doi")
figshare_logger = logging.getLogger("scipost.services.figshare")
osfpreprints_logger = logging.getLogger("scipost.services.osfpreprints")


[docs]def extract_publication_date_from_Crossref_data(data): date_parts = data.get("issued", {}).get("date-parts", {}) if date_parts: date_parts = date_parts[0] year = date_parts[0] month = date_parts[1] if len(date_parts) > 1 else 1 day = date_parts[2] if len(date_parts) > 2 else 1 pub_date = datetime.date(year, month, day).isoformat() else: pub_date = "" return pub_date
[docs]class DOICaller: def __init__(self, doi_string): self.doi_string = doi_string doi_logger.info("New DOI call for %s" % doi_string) self._call_crosslink() if self.is_valid: self._format_data() def _call_crosslink(self): url = "https://api.crossref.org/works/%s" % self.doi_string request = requests.get(url) doi_logger.info( "GET [{doi}] [request] | {url}".format( doi=self.doi_string, url=url, ) ) if request.ok: self.is_valid = True self._crossref_data = request.json()["message"] else: self.is_valid = False doi_logger.info( "GET [{doi}] [response {valid}] | {response}".format( doi=self.doi_string, valid="VALID" if self.is_valid else "INVALID", response=request.text, ) ) def _format_data(self): data = self._crossref_data title = data.get("title", [])[0] # author_list is given as a comma separated list of names on the relevant models author_list = [] for author in data.get("author", []): try: author_list.append("{} {}".format(author["given"], author["family"])) except KeyError: author_list.append(author["name"]) author_list = ", ".join(author_list) abstract = data.get("abstract", "") journal = ( data.get("container-title", [])[0] if len(data.get("container-title", [])) > 0 else "" ) volume = data.get("volume", "") pages = self._get_pages(data) pub_date = extract_publication_date_from_Crossref_data(data) self.data = { "title": title, "author_list": author_list, "abstract": abstract, "journal": journal, "volume": volume, "pages": pages, "pub_date": pub_date, "crossref_data": self._crossref_data, } doi_logger.info( "GET [{doi}] [formatted data] | {data}".format( doi=self.doi_string, data=self.data, ) ) def _get_pages(self, data): # For Physical Review pages = data.get("article-number", "") # For other journals? if not pages: pages = data.get("page", "") return pages
[docs]class ArxivCaller: """ArXiv Caller will help retrieve Submission data from arXiv API.""" query_base_url = "https://export.arxiv.org/api/query?id_list=%s" def __init__(self, identifier): self.identifier = identifier arxiv_logger.info("New ArXiv call for identifier %s" % identifier) self._call_arxiv() if self.is_valid: self._format_data() def _call_arxiv(self): url = self.query_base_url % self.identifier request = requests.get(url) response_content = feedparser.parse(request.content) arxiv_logger.info( "GET [{arxiv}] [request] | {url}".format( arxiv=self.identifier, url=url, ) ) if self._search_result_present(response_content): arxiv_data = response_content["entries"][0] self.is_valid = True self._arxiv_data = arxiv_data self.metadata = response_content else: self.is_valid = False arxiv_logger.info( "GET [{arxiv}] [response {valid}] | {response}".format( arxiv=self.identifier, valid="VALID" if self.is_valid else "INVALID", response=response_content, ) ) def _format_data(self): data = self._arxiv_data title = data["title"] author_list = [author["name"] for author in data.get("authors", [])] # author_list is given as a comma separated list of names on the relevant models (Commentary, Submission) author_list = ", ".join(author_list) arxiv_link = data["id"].replace("http:", "https:") abstract = data["summary"] pub_date = dateutil.parser.parse(data["published"]).date() self.data = { "title": title, "author_list": author_list, "pub_abstract": abstract, "abstract": abstract, # Duplicate for Commentary/Submission cross-compatibility "pub_date": pub_date, "arxiv_link": arxiv_link, # Duplicate for Commentary "preprint_server": PreprintServer.objects.get(name="arXiv"), "preprint_link": arxiv_link, } arxiv_logger.info( "GET [{arxiv}] [formatted data] | {data}".format( arxiv=self.identifier, data=self.data, ) ) def _search_result_present(self, data): if len(data.get("entries", [])) > 0: return "title" in data["entries"][0] return False
[docs]class FigshareCaller: """ Figshare caller to get data from api.figshare.com. """ query_base_url = "https://api.figshare.com/v2/articles/%s/versions/%s" def __init__(self, preprint_server, identifier_w_vn_nr): self.preprint_server = preprint_server self.identifier_w_vn_nr = identifier_w_vn_nr self.identifier = identifier_w_vn_nr.split(".")[0] self.version = identifier_w_vn_nr.split(".v")[1] figshare_logger.info( "New figshare API call for identifier %s.v%s" % (self.identifier, self.version) ) self._call_figshare() if self.is_valid: self._format_data() def _call_figshare(self): url = self.query_base_url % (self.identifier, self.version) request = requests.get(url) response_content = request.json() figshare_logger.info( "GET [{identifier_w_vn_nr} [request] | {url}".format( identifier_w_vn_nr=self.identifier_w_vn_nr, url=url, ) ) if self._result_present(response_content): self.is_valid = True self._figshare_data = response_content self.metadata = response_content else: self.is_valid = False figshare_logger.info( "GET [{identifier}] [response {valid}] | {response}".format( identifier=self.identifier, valid="VALID" if self.is_valid else "INVALID", response=response_content, ) ) def _format_data(self): """Format data to prefill SubmissionForm as much as possible""" title = self._figshare_data["title"] author_list = [ author["full_name"] for author in self._figshare_data.get("authors", []) ] # author_list is given as a comma separated list of names on the relevant models (Commentary, Submission) author_list = ", ".join(author_list) abstract = self._figshare_data["description"] pub_date = self._figshare_data["published_date"] figshare_doi = self._figshare_data["doi"] identifier_w_vn_nr = ( self.preprint_server.name.lower() + "_" + self.identifier_w_vn_nr ) self.data = { "title": title, "author_list": author_list, "abstract": abstract, "pub_date": pub_date, "preprint_server": self.preprint_server, "preprint_link": "https://doi.org/" + figshare_doi, "identifier_w_vn_nr": identifier_w_vn_nr, } def _result_present(self, data): try: return data["id"] == int(self.identifier) except KeyError: pass return False
[docs]class OSFPreprintsCaller: """ OSFPreprints caller to get data from api.osf.io. """ query_base_url = "https://api.osf.io/v2/preprints/%s/?embed=contributors" def __init__(self, preprint_server, identifier): self.preprint_server = preprint_server self.identifier = identifier osfpreprints_logger.info( "New osfpreprints API call for identifier %s" % self.identifier ) self._call_osfpreprints() if self.is_valid: self._format_data() def _call_osfpreprints(self): url = self.query_base_url % self.identifier request = requests.get(url) response_content = request.json() osfpreprints_logger.info( "GET [{identifier} [request] | {url}".format( identifier=self.identifier, url=url, ) ) if self._result_present(response_content): self.is_valid = True self._osfpreprints_data = response_content["data"] self.metadata = response_content["data"] else: self.is_valid = False osfpreprints_logger.info( "GET [{identifier}] [response {valid}] | {response}".format( identifier=self.identifier, valid="VALID" if self.is_valid else "INVALID", response=response_content, ) ) def _format_data(self): """Format data to prefill SubmissionForm as much as possible""" title = self._osfpreprints_data["attributes"]["title"] contributors_data = self._osfpreprints_data["embeds"]["contributors"]["data"] author_list = [ d["embeds"]["users"]["data"]["attributes"]["full_name"] for d in contributors_data ] # author_list is given as a comma separated list of names on the relevant models (Commentary, Submission) author_list = ", ".join(author_list) abstract = self._osfpreprints_data["attributes"]["description"] pub_date = self._osfpreprints_data["attributes"]["date_published"] osfpreprints_doi = self._osfpreprints_data["links"]["preprint_doi"] identifier_w_vn_nr = self.preprint_server.name.lower() + "_" + self.identifier self.data = { "title": title, "author_list": author_list, "abstract": abstract, "pub_date": pub_date, "preprint_server": self.preprint_server, "preprint_link": osfpreprints_doi, "identifier_w_vn_nr": identifier_w_vn_nr, } def _result_present(self, response_content): try: return response_content["data"]["id"] == self.identifier except KeyError: pass return False