from collections import namedtuple
from datetime import datetime
from hashlib import md5
from typing import List, NamedTuple, Optional, Tuple, Union
from warnings import warn
from pybliometrics.scopus.superclasses import Retrieval
from pybliometrics.scopus.utils import chained_get, check_parameter_value
[docs]
class CitationOverview(Retrieval):
@property
def authors(self) -> Optional[List[Optional[NamedTuple]]]:
"""A list of lists of namedtuples storing author information,
where each namedtuple corresponds to one author and each sub-list to
one document.
The information in each namedtuple is `(name surname initials id url)`.
All entries are strings.
"""
outer = []
order = 'name surname initials id url'
auth = namedtuple('Author', order)
for doc in self._citeInfoMatrix:
inner = []
for author in doc.get('author', []):
author = {k.split(":", 1)[-1]: v for k, v in author.items()}
new = auth(name=author.get('index-name'),
id=author['authid'],
surname=author.get('surname'),
initials=author.get('initials'),
url=author.get('author-url'))
inner.append(new)
outer.append(inner or None)
return _maybe_return_list(outer)
@property
def cc(self) -> List[List[Tuple[int, int]]]:
"""List of lists of tuples of yearly number of citations for specified
years, where each sub-list corresponds to one document.
"""
_years = range(self._start, self._end+1)
outer = []
for doc in self._citeInfoMatrix:
try:
cites = [int(d['$']) for d in doc['cc']]
except (AttributeError, KeyError, TypeError): # No citations
cites = [0]*len(_years)
outer.append(list(zip(_years, cites)))
return _maybe_return_list(outer)
@property
def citationType_long(self) -> Optional[List[str]]:
"""Type (long version) of the documents (e.g. article, review)."""
path = ["citationType", "$"]
out = [chained_get(e, path) for e in self._citeInfoMatrix]
return _maybe_return_list(out)
@property
def citationType_short(self) -> Optional[List[str]]:
"""Type (short version) of the documents (e.g. ar, re)."""
path = ["citationType", "@code"]
out = [chained_get(e, path) for e in self._citeInfoMatrix]
return _maybe_return_list(out)
@property
def columnTotal(self) -> int:
"""The yearly number of citations for all documents combined."""
return [int(d["$"]) for d in self._citeCountHeader["columnTotal"]]
@property
def doi(self) -> Optional[List[str]]:
"""Document Object Identifier (DOI) of the documents."""
out = [e.get('doi') for e in self._identifierlegend]
return _maybe_return_list(out)
@property
def endingPage(self) -> Optional[List[str]]:
"""Ending pages of the documents."""
out = [e.get('endingPage') for e in self._citeInfoMatrix]
return _maybe_return_list(out)
@property
def grandTotal(self) -> int:
"""The total number of citations of all documents together."""
return int(self._citeCountHeader["grandTotal"])
@property
def h_index(self) -> int:
"""Combined h-index of citations of all the documents."""
return int(self._data['h-index'])
@property
def issn(self) -> Optional[List[Optional[Union[str, Tuple[str, str]]]]]:
"""ISSN of the publishers of the documents.
Note: If E-ISSN is known to Scopus, this returns both
ISSN and E-ISSN in random order separated by blank space.
"""
out = [e.get('issn') for e in self._citeInfoMatrix]
return _maybe_return_list(out)
@property
def issueIdentifier(self) -> Optional[List[Optional[str]]]:
"""Issue numbers of the documents."""
out = [e.get('issueIdentifier') for e in self._citeInfoMatrix]
return _maybe_return_list(out)
@property
def laterColumnTotal(self) -> int:
"""The total number of citations for all years after the end
year for all documents combined.
"""
return int(self._citeCountHeader["laterColumnTotal"])
@property
def lcc(self) -> List[int]:
"""Number of citations after the end year of each document."""
return [int(m['lcc']) for m in self._citeInfoMatrix]
@property
def pcc(self) -> int:
"""Number of citations before the start year."""
return [int(m['pcc']) for m in self._citeInfoMatrix]
@property
def pii(self) -> Optional[List[Optional[str]]]:
"""The Publication Item Identifier (PII) of the documents."""
out = [e.get('pii') for e in self._identifierlegend]
return _maybe_return_list(out)
@property
def prevColumnTotal(self) -> int:
"""The total number of citations for all years before the start
year for all documents combined.
"""
return int(self._citeCountHeader["prevColumnTotal"])
@property
def rangeColumnTotal(self) -> int:
"""The total number of citations for all specified years for all
documents combined.
"""
return int(self._citeCountHeader["rangeColumnTotal"])
@property
def rangeCount(self) -> List[int]:
"""Total citation count over the specified year range for
each document.
"""
return [int(e['rangeCount']) for e in self._citeInfoMatrix]
@property
def rowTotal(self) -> List[int]:
"""Total number of citations (specified and omitted years) for each
document.
"""
return [int(e['rowTotal']) for e in self._citeInfoMatrix]
@property
def scopus_id(self) -> List[int]:
"""The Scopus ID(s) of the documents. Might differ from the
ones provided.
"""
return [int(e['scopus_id']) for e in self._identifierlegend]
@property
def sortTitle(self) -> Optional[List[Optional[str]]]:
"""Name of source the documents are published in (e.g. the Journal)."""
out = [e.get('sortTitle') for e in self._citeInfoMatrix]
return _maybe_return_list(out)
@property
def startingPage(self) -> Optional[List[Optional[str]]]:
"""Starting page."""
out = [e.get('startingPage') for e in self._citeInfoMatrix]
return _maybe_return_list(out)
@property
def title(self) -> List[str]:
"""Titles of each document."""
return [e["title"] for e in self._citeInfoMatrix]
@property
def url(self) -> List[str]:
"""URL(s) to Citation Overview API view of each document."""
return [e["url"] for e in self._citeInfoMatrix]
@property
def volume(self) -> Optional[str]:
"""Volume for the abstract."""
out = [e.get('volume') for e in self._citeInfoMatrix]
return _maybe_return_list(out)
def __init__(self,
identifier: List[Union[int, str]],
start: Union[int, str],
end: Union[int, str] = datetime.now().year,
id_type: str = "scopus_id",
eid: str = None,
refresh: Union[bool, int] = False,
citation: Optional[str] = None,
**kwds: str
) -> None:
"""Interaction with the Citation Overview API.
:param identifier: Up to 25 identifiers for which to look up
citations. Must be Scopus IDs, DOIs, PIIs or
Pubmed IDs.
:param start: The first year for which the citation count should
be loaded.
:param end: The last year for which the citation count should be
loaded. Defaults to the current year.
:param id_type: The type of the IDs provided in `identifier`. Must be
one of `"scopus_id", "doi", "pii", "pubmed_id"`.
:param eid: (deprecated) The Scopus ID of the abstract - will be
removed in a future release: Instead use param `scopus_id`
after stripping the part until the second hyphen. If you
use this parameter, it will be converted to `scopus_id`
instead.
:param refresh: Whether to refresh the cached file if it exists or not.
If int is passed, cached file will be refreshed if the
number of days since last modification exceeds that value.
:param citation: Allows for the exclusion of self-citations or those
by books. If `None`, will count all citations.
Allowed values: `None, exclude-self, exclude-books`
:param kwds: Keywords passed on as query parameters. Must contain
fields and values mentioned in the API specification at
https://dev.elsevier.com/documentation/AbstractCitationAPI.wadl.
Raises
-----
ValueError
If parameter `identifier` contains fewer than 1 or more than
25 elements.
ValueError
If any of the parameters `citation`, `id_type` or `refresh` is not
one of the allowed values.
Notes
-----
The directory for cached results is `{path}/STANDARD/{id}-{citation}`,
where `path` is specified in your configuration file, and `id` the
md5-hashed version of a string joining `identifier` on underscore.
Your API Key needs to be augmented by Elsevier's Scopus
Integration Team to access this API.
"""
# Checks
allowed = ('scopus_id', 'doi', 'pii', 'pubmed_id')
check_parameter_value(id_type, allowed, "id_type")
if citation:
allowed = ('exclude-self', 'exclude-books')
check_parameter_value(citation, allowed, "citation")
if eid or not isinstance(identifier, list):
msg = "Parameter `eid` is deprecated and will be removed in a "\
"future release. Instead, provide the corresponding "\
"Scopus ID via parameter `identifier` as a list, and set "\
"`id_type='scopus_id'`."
warn(msg, FutureWarning)
if len(identifier) < 0 or len(identifier) > 25:
msg = "Provide at least 1 and at most than 25 identifiers"
raise ValueError(msg)
# Variables
identifier = [str(i) for i in identifier]
self._start = int(start)
self._end = int(end)
self._citation = citation
self._refresh = refresh
self._view = "STANDARD"
# Get file content
date = f'{start}-{end}'
kwds.update({id_type: identifier})
stem = md5("_".join(identifier).encode('utf8')).hexdigest()
Retrieval.__init__(self, stem, api='CitationOverview', date=date,
citation=citation, **kwds)
self._data = self._json['abstract-citations-response']
# citeInfoMatrix
matrix = self._data['citeInfoMatrix']['citeInfoMatrixXML']['citationMatrix']['citeInfo']
self._citeInfoMatrix = [_parse_dict(e) for e in matrix]
# identifier-legend
identifier = self._data['identifier-legend']['identifier']
self._identifierlegend = [_parse_dict(e) for e in identifier]
# citeCountHeader
self._citeCountHeader = self._data['citeColumnTotalXML']["citeCountHeader"]
def __str__(self):
"""Return a summary string."""
cits_dict = {'exclude-self': 'excluding self-citations',
'exclude-books': 'excluding citations from books'}
date = self.get_cache_file_mdate().split()[0]
cits_type = f'{cits_dict.get(self._citation, "")}'
s = f"{len(self.scopus_id)} document(s) has/have the following "\
f"total citation count{cits_type} as of {date}:\n "\
f"{'; '.join([str(n) for n in self.rowTotal])}"
return s
def _parse_dict(dct):
"""Auxiliary function to change the keys of a dictionary."""
return {k.split(":", 1)[-1]: v for k, v in dct.items()}
def _maybe_return_list(lst):
"""Return `lst` unless all of its elements are empty."""
if all(e is None for e in lst):
return None
else:
return lst