Source code for pybliometrics.scopus.abstract_retrieval

from collections import defaultdict, namedtuple
from typing import List, NamedTuple, Optional, Tuple, Union

from pybliometrics.scopus.superclasses import Retrieval
from pybliometrics.scopus.utils import chained_get, check_parameter_value,\
    deduplicate, get_id, detect_id_type, get_link, listify,\
    make_int_if_possible, parse_date_created, VIEWS


[docs] class AbstractRetrieval(Retrieval): @property def abstract(self) -> Optional[str]: """The abstract of a document. Note: If this is empty, try `description` property instead. """ return self._head.get('abstracts') @property def affiliation(self) -> Optional[List[NamedTuple]]: """A list of namedtuples representing listed affiliations in the form `(id, name, city, country)`. """ out = [] aff = namedtuple('Affiliation', 'id name city country') affs = listify(self._json.get('affiliation', [])) for item in affs: new = aff(id=make_int_if_possible(item.get('@id')), name=item.get('affilname'), city=item.get('affiliation-city'), country=item.get('affiliation-country')) out.append(new) return out or None @property def aggregationType(self) -> str: """Aggregation type of source the document is published in.""" return chained_get(self._json, ['coredata', 'prism:aggregationType']) @property def authkeywords(self) -> Optional[List[str]]: """List of author-provided keywords of the document.""" keywords = self._json.get('authkeywords') if not keywords: return None else: try: return [d['$'] for d in keywords['author-keyword']] except TypeError: # Singleton keyword return [keywords['author-keyword']['$']] @property def authorgroup(self) -> Optional[List[NamedTuple]]: """A list of namedtuples representing the article's authors organized by affiliation, in the form `(affiliation_id, dptid, organization, city, postalcode, addresspart, country, collaboration, auid, orcid, indexed_name, surname, given_name)`. If `given_name` is not present, fall back to initials. Note: Affiliation information might be missing or mal-assigned even when it looks correct in the web view. In this case please request a correction. It is generally missing for collaborations. """ # Information can be one of three forms: # 1. A dict with one key (author) or two keys (affiliation and author) # 2. A list of dicts with as in 1, one for each affiliation (incl. missing) # 3. A list of two dicts with one key each (author and collaboration) # Initialization fields = 'affiliation_id dptid organization city postalcode '\ 'addresspart country collaboration auid orcid indexed_name '\ 'surname given_name' auth = namedtuple('Author', fields) items = listify(self._head.get('author-group', [])) index_path = ['preferred-name', 'ce:indexed-name'] # Check for collaboration keys = [k for x in items for k in list(x.keys())] if "collaboration" in keys: collaboration = items.pop(-1)['collaboration'] else: collaboration = {'ce:indexed-name': None} # Iterate through each author-affiliation combination out = [] for item in items: if not item: continue # Affiliation information aff = item.get('affiliation', {}) aff_id = make_int_if_possible(aff.get("@afid")) dep_id = make_int_if_possible(aff.get("@dptid")) org = _get_org(aff) # Author information (might relate to collaborations) authors = listify(item.get('author', item.get('collaboration', []))) for au in authors: try: given = au.get('ce:given-name', au['ce:initials']) except KeyError: # Collaboration given = au.get('ce:text') new = auth(affiliation_id=aff_id, organization=org, city=aff.get('city'), dptid=dep_id, postalcode=aff.get('postal-code'), addresspart=aff.get('address-part'), country=aff.get('country'), collaboration=collaboration.get('ce:indexed-name'), auid=int(au['@auid']), orcid=au.get('@orcid'), surname=au.get('ce:surname'), given_name=given, indexed_name=chained_get(au, index_path)) out.append(new) return out or None @property def authors(self) -> Optional[List[NamedTuple]]: """A list of namedtuples representing the article's authors, in the form `(auid, indexed_name, surname, given_name, affiliation)`. In case multiple affiliation IDs are given, they are joined on `";"`. Note: The affiliation referred to here is what Scopus' algorithm determined as the main affiliation. Property `authorgroup` provides all affiliations. """ out = [] fields = 'auid indexed_name surname given_name affiliation' auth = namedtuple('Author', fields) for item in chained_get(self._json, ['authors', 'author'], []): affs = [a for a in listify(item.get('affiliation')) if a] or None try: aff = ";".join([aff.get('@id') for aff in affs]) except TypeError: aff = None new = auth(auid=int(item['@auid']), surname=item.get('ce:surname'), indexed_name=item.get('ce:indexed-name'), affiliation=aff, given_name=chained_get(item, ['preferred-name', 'ce:given-name'])) out.append(new) return out or None @property def citedby_count(self) -> Optional[int]: """Number of articles citing the document.""" path = ['coredata', 'citedby-count'] return make_int_if_possible(chained_get(self._json, path)) @property def citedby_link(self) -> str: """URL to Scopus page listing citing documents.""" return get_link(self._json, 2) @property def chemicals(self) -> Optional[List[NamedTuple]]: """List of namedtuples representing chemical entities in the form `(source, chemical_name, cas_registry_number)`. In case multiple numbers given, they are joined on `";"`. """ path = ['enhancement', 'chemicalgroup', 'chemicals'] items = listify(chained_get(self._head, path, [])) fields = 'source chemical_name cas_registry_number' chemical = namedtuple('Chemical', fields) out = [] for item in items: for chem in listify(item['chemical']): number = chem.get('cas-registry-number') try: # Multiple numbers given num = ";".join([n['$'] for n in number]) except TypeError: num = number new = chemical(source=item['@source'], cas_registry_number=num, chemical_name=chem['chemical-name']) out.append(new) return out or None @property def confcode(self) -> Optional[int]: """Code of the conference the document belongs to.""" return make_int_if_possible(self._confevent.get('confcode')) @property def confdate(self) -> Optional[Tuple[Tuple[int, int], Tuple[int, int]]]: """Date range of the conference the document belongs to represented by two tuples in the form (YYYY, MM, DD). """ dates = self._confevent.get('confdate', {}) try: keys = ("startdate", "enddate") date_order = ("@year", "@month", "@day") d = (tuple(int(dates[k1][k2]) for k2 in date_order) for k1 in keys) return tuple(d) except KeyError: return None @property def conflocation(self) -> Optional[str]: """Location of the conference the document belongs to.""" return chained_get(self._confevent, ['conflocation', 'city-group']) @property def confname(self) -> Optional[str]: """Name of the conference the document belongs to.""" return self._confevent.get('confname') @property def confsponsor(self) -> Optional[Union[List[str], str]]: """Sponsor(s) of the conference the document belongs to.""" path = ['confsponsors', 'confsponsor'] sponsors = chained_get(self._confevent, path, []) if len(sponsors) == 0: return None if isinstance(sponsors, list): return [s['$'] for s in sponsors] return sponsors @property def contributor_group(self) -> Optional[List[NamedTuple]]: """List of namedtuples representing contributors compiled by Scopus, in the form `(given_name, initials, surname, indexed_name, role)`. """ path = ['source', 'contributor-group'] items = listify(chained_get(self._head, path, [])) out = [] fields = 'given_name initials surname indexed_name role' pers = namedtuple('Contributor', fields) for item in items: entry = item.get('contributor', {}) new = pers(indexed_name=entry.get('ce:indexed-name'), role=entry.get('@role'), surname=entry.get('ce:surname'), given_name=entry.get('ce:given-name'), initials=entry.get('ce:initials')) out.append(new) return out or None @property def copyright(self) -> str: """The copyright statement of the document.""" path = ['item', 'bibrecord', 'item-info', 'copyright', '$'] return chained_get(self._json, path) @property def copyright_type(self) -> str: """The copyright holder of the document.""" path = ['item', 'bibrecord', 'item-info', 'copyright', '@type'] return chained_get(self._json, path) @property def correspondence(self) -> Optional[List[NamedTuple]]: """List of namedtuples representing the authors to whom correspondence should be addressed, in the form ´(surname, initials, organization, country, city_group)´. Multiple organziations are joined on semicolon. """ fields = 'surname initials organization country city_group' auth = namedtuple('Correspondence', fields) items = listify(self._head.get('correspondence', [])) out = [] for item in items: aff = item.get('affiliation', {}) try: org = aff['organization'] try: org = org['$'] except TypeError: # Multiple names given org = "; ".join([d['$'] for d in org]) except KeyError: org = None new = auth(surname=item.get('person', {}).get('ce:surname'), initials=item.get('person', {}).get('ce:initials'), organization=org, country=aff.get('country'), city_group=aff.get('city-group')) out.append(new) return out or None @property def coverDate(self) -> str: """The date of the cover the document is in.""" return chained_get(self._json, ['coredata', 'prism:coverDate']) @property def date_created(self) -> Optional[Tuple[int, int, int]]: """Return the `date_created` of a record. """ path = ["item", "bibrecord", "item-info", "history"] d = chained_get(self._json, path, {}) try: return parse_date_created(d) except KeyError: return None @property def description(self) -> Optional[str]: """Return the description of a record. Note: If this is empty, try `abstract` property instead. """ return chained_get(self._json, ['coredata', 'dc:description']) @property def document_entitlement_status(self) -> Optional[str]: """Returns the document entitlement status, i.e. tells if the requestor is entitled to the requested resource. Note: Only works with `ENTITLED` view. """ return chained_get(self._json, ['document-entitlement', 'status']) @property def doi(self) -> Optional[str]: """DOI of the document.""" return chained_get(self._json, ['coredata', 'prism:doi']) @property def eid(self) -> str: """EID of the document.""" return chained_get(self._json, ['coredata', 'eid']) @property def endingPage(self) -> Optional[str]: """Ending page. If this is empty, try `pageRange` property instead.""" # Try coredata first, fall back to head afterwards ending = chained_get(self._json, ['coredata', 'prism:endingPage']) if not ending: path = ['source', 'volisspag', 'pagerange', '@last'] ending = chained_get(self._head, path) return ending @property def funding(self) -> Optional[List[NamedTuple]]: """List of namedtuples parsed funding information in the form `(agency, agency_id, string, funding_id, acronym, country)`. """ def _get_funding_id(f_dict: dict) -> list: funding_get = f_dict.get('xocs:funding-id', []) try: return [v['$'] for v in funding_get] or None # multiple or empty except TypeError: return [funding_get] # single path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding'] funds = listify(chained_get(self._json, path, [])) out = [] fields = 'agency agency_id string funding_id acronym country' fund = namedtuple('Funding', fields) for item in funds: new = fund(agency=item.get('xocs:funding-agency'), agency_id=item.get('xocs:funding-agency-id'), string=item.get('xocs:funding-agency-matched-string'), funding_id=_get_funding_id(item), acronym=item.get('xocs:funding-agency-acronym'), country=item.get('xocs:funding-agency-country')) out.append(new) return out or None @property def funding_text(self) -> Optional[str]: """The raw text from which Scopus derives funding information.""" path = ['item', 'xocs:meta', 'xocs:funding-list', 'xocs:funding-text'] return chained_get(self._json, path) @property def isbn(self) -> Optional[Tuple[str, ...]]: """ISBNs `Optional[str]` to publicationName as tuple of variying length, (e.g. ISBN-10 or ISBN-13).""" isbns = listify(chained_get(self._head, ['source', 'isbn'], [])) if len(isbns) == 0: return None else: return tuple((i['$'] for i in isbns)) @property def issn(self) -> Optional[NamedTuple]: """Namedtuple in the form `(print electronic)`. Note: If the source has an E-ISSN, the META view will return None. Use FULL view instead. """ container = defaultdict(lambda: None) # Parse information from head (from FULL view) info = listify(chained_get(self._head, ['source', 'issn'], [])) for t in info: try: container[t["@type"]] = t["$"] except TypeError: container["print"] = t # Parse information from coredata as fallback fallback = chained_get(self._json, ['coredata', 'prism:issn']) if fallback and len(container) < 2: parts = fallback.split() if len(parts) == 2: if len(container) == 1: for n, o in (("electronic", "print"), ("print", "electronic")): if n not in container: container[n] = [p for p in parts if p != container[o]] else: # no way to find out which is which pass else: container["print"] = parts[0] # Finalize issns = namedtuple('ISSN', 'print electronic', defaults=(None, None)) if not container: return None else: return issns(**container) @property def identifier(self) -> int: """ID of the document (same as EID without "2-s2.0-").""" return get_id(self._json) @property def idxterms(self) -> Optional[List[str]]: """List of index terms (these are just one category of those Scopus provides in the web version) .""" try: terms = listify(self._json.get("idxterms", {}).get('mainterm', [])) except AttributeError: # idxterms is empty return None try: return [d['$'] for d in terms] or None except AttributeError: return None @property def issueIdentifier(self) -> Optional[str]: """Number of the issue the document was published in.""" return chained_get(self._json, ['coredata', 'prism:issueIdentifier']) @property def issuetitle(self) -> Optional[str]: """Title of the issue the document was published in.""" return chained_get(self._head, ['source', 'issuetitle']) @property def language(self) -> Optional[str]: """Language of the article.""" return chained_get(self._json, ['language', '@xml:lang']) @property def openaccess(self) -> Optional[int]: """The openaccess status encoded in single digits.""" path = ['coredata', 'openaccess'] return make_int_if_possible(chained_get(self._json, path)) @property def openaccessFlag(self) -> Optional[bool]: """Whether the document is available via open access or not.""" flag = chained_get(self._json, ['coredata', 'openaccessFlag']) if flag: flag = flag == "true" return flag @property def pageRange(self) -> Optional[str]: """Page range. If this is empty, try `startingPage` and `endingPage` properties instead. """ # Try data from coredata first, fall back to head afterwards pages = chained_get(self._json, ['coredata', 'prism:pageRange']) if not pages: return chained_get(self._head, ['source', 'volisspag', 'pages']) return pages @property def pii(self) -> Optional[str]: """The PII (Publisher Item Identifier) of the document.""" return chained_get(self._json, ['coredata', 'pii']) @property def publicationName(self) -> Optional[str]: """Name of source the document is published in.""" return chained_get(self._json, ['coredata', 'prism:publicationName']) @property def publisher(self) -> Optional[str]: """Name of the publisher of the document. Note: Information provided in the FULL view of the article might be more complete. """ # Return information from FULL view, fall back to other views full = chained_get(self._head, ['source', 'publisher', 'publishername']) if full is None: return chained_get(self._json, ['coredata', 'dc:publisher']) else: return full @property def publisheraddress(self) -> Optional[str]: """Name of the publisher of the document.""" return chained_get(self._head, ['source', 'publisher', 'publisheraddress']) @property def pubmed_id(self) -> Optional[int]: """The PubMed ID of the document.""" path = ['coredata', 'pubmed-id'] return make_int_if_possible(chained_get(self._json, path)) @property def refcount(self) -> Optional[int]: """Number of references of an article. Note: Requires either the FULL view or REF view. """ try: # REF view return int(self._ref['@total-references']) except KeyError: # FULL view try: return int(self._ref['@refcount']) except KeyError: return None @property def references(self) -> Optional[List[NamedTuple]]: """List of namedtuples representing references listed in the document, in the form `(position, id, doi, title, authors, authors_auid, authors_affiliationid, sourcetitle, publicationyear, coverDate, volume, issue, first, last, citedbycount, type, text, fulltext)`. `position` is the number at which the reference appears in the document, `id` is the Scopus ID of the referenced document (EID without the "2-s2.0-"), `authors` is a string of the names of the authors in the format "Surname1, Initials1; Surname2, Initials2", `authors_auid` is a string of the author IDs joined on "; ", `authors_affiliationid` is a string of the authors' affiliation IDs joined on "; ", `sourcetitle` is the name of the source (e.g. the journal), `publicationyear` is the year of the publication as string (FULL view only), `coverDate` is the date of the publication as string (REF view only), `volume` and `issue`, are strings referring to the volume and issue, `first` and `last` refer to the page range, `citedbycount` the total number of citations of the cited item (REF view only), `type` describes the parsing status of the reference (resolved or not), `text` is information on the publication, `fulltext` is the text the authors used for the reference. Note: Requires either the FULL view or REF view. Might be empty even if refcount is positive. Specific fields can be empty. The lists `authors` and `authors_auid` may contain duplicates because of the 1:1 pairing with the list `authors_affiliationid`. """ out = [] fields = 'position id doi title authors authors_auid '\ 'authors_affiliationid sourcetitle publicationyear coverDate '\ 'volume issue first last citedbycount type text fulltext' ref = namedtuple('Reference', fields) items = listify(self._ref.get("reference", [])) for item in items: try: info = item.get('ref-info', item) except AttributeError: # item not a dictionary continue volisspag = info.get('volisspag', {}) or {} if isinstance(volisspag, list): volisspag = volisspag[0] volis = volisspag.get("voliss", {}) if isinstance(volis, list): volis = volis[0] # Parse author information if self._view == 'FULL': # FULL view parsing auth = listify(info.get('ref-authors', {}).get('author', [])) authors = [', '.join(filter(None, [d.get('ce:surname'), d.get('ce:initials')])) for d in auth] auids = None affids = None ids = listify(info['refd-itemidlist']['itemid']) doi = _select_by_idtype(ids, id_type='DOI') scopus_id = _select_by_idtype(ids, id_type='SGR') else: # REF view parsing auth = (info.get('author-list') or {}).get('author', []) auth = deduplicate(auth) authors = [', '.join(filter(None, [d.get('ce:surname'), d.get('ce:given-name')])) for d in auth] auids = "; ".join(filter(None, [d.get('@auid') for d in auth])) affs = filter(None, [d.get('affiliation') for d in auth]) affids = "; ".join([aff.get('@id') for aff in affs]) doi = info.get('ce:doi') scopus_id = info.get('scopus-id') # Combine information new = ref(position=item.get('@id'), id=scopus_id, doi=doi, authors="; ".join(authors), authors_auid=auids or None, authors_affiliationid=affids or None, title=info.get('ref-title', {}).get('ref-titletext', info.get('title')), sourcetitle=info.get('ref-sourcetitle', info.get('sourcetitle')), publicationyear=info.get('ref-publicationyear', {}).get('@first'), coverDate=info.get('prism:coverDate'), volume=volis.get('@volume'), issue=volis.get('@issue'), first=volisspag.get('pagerange', {}).get('@first'), last=volisspag.get('pagerange', {}).get('@last'), citedbycount=info.get('citedby-count'), type=info.get('type'), text=info.get('ref-text'), fulltext=item.get('ref-fulltext')) out.append(new) return out or None @property def scopus_link(self) -> str: """URL to the document page on Scopus.""" return get_link(self._json, 1) @property def self_link(self) -> str: """URL to Scopus API page of this document.""" return get_link(self._json, 0) @property def sequencebank(self) -> Optional[List[NamedTuple]]: """List of namedtuples representing biological entities defined or mentioned in the text, in the form `(name, sequence_number, type)`. """ path = ['enhancement', 'sequencebanks', 'sequencebank'] items = listify(chained_get(self._head, path, [])) bank = namedtuple('Sequencebank', 'name sequence_number type') out = [] for item in items: numbers = listify(item['sequence-number']) for number in numbers: new = bank(name=item['@name'], sequence_number=number['$'], type=number['@type']) out.append(new) return out or None @property def source_id(self) -> Optional[int]: """Scopus source ID of the document.""" path = ['coredata', 'source-id'] return make_int_if_possible(chained_get(self._json, path)) @property def sourcetitle_abbreviation(self) -> Optional[str]: """Abbreviation of the source the document is published in. Note: Requires the FULL view of the article. """ return self._head.get('source', {}).get('sourcetitle-abbrev') @property def srctype(self) -> Optional[str]: """Aggregation type of source the document is published in (short version of aggregationType). """ return chained_get(self._json, ['coredata', 'srctype']) @property def startingPage(self) -> Optional[str]: """Starting page. If this is empty, try `pageRange` property instead.""" # Try coredata first, fall back to bibrecord afterwards starting = chained_get(self._json, ['coredata', 'prism:startingPage']) if not starting: path = ['source', 'volisspag', 'pagerange', '@first'] starting = chained_get(self._head, path) return starting @property def subject_areas(self) -> Optional[List[NamedTuple]]: """List of namedtuples containing subject areas of the article in the form `(area abbreviation code)`. Note: Requires the FULL view of the article. """ area = namedtuple('Area', 'area abbreviation code') path = ['subject-areas', 'subject-area'] out = [area(area=item['$'], abbreviation=item['@abbrev'], code=int(item['@code'])) for item in listify(chained_get(self._json, path, []))] return out or None @property def subtype(self) -> str: """Type of the document. Refer to the Scopus Content Coverage Guide for a list of possible values. Short version of subtypedescription. """ return chained_get(self._json, ['coredata', 'subtype']) or None @property def subtypedescription(self) -> str: """Type of the document. Refer to the Scopus Content Coverage Guide for a list of possible values. Long version of subtype. """ return chained_get(self._json, ['coredata', 'subtypeDescription']) or None @property def title(self) -> Optional[str]: """Title of the document.""" return chained_get(self._json, ['coredata', 'dc:title']) @property def url(self) -> Optional[str]: """URL to the API view of the document.""" return chained_get(self._json, ['coredata', 'prism:url']) @property def volume(self) -> Optional[str]: """Volume for the document.""" return chained_get(self._json, ['coredata', 'prism:volume']) @property def website(self) -> str: """Website of publisher.""" path = ['source', 'website', 'ce:e-address', '$'] return chained_get(self._head, path) def __init__(self, identifier: Union[int, str] = None, refresh: Union[bool, int] = False, view: str = 'META_ABS', id_type: str = None, **kwds: str ) -> None: """Interaction with the Abstract Retrieval API. :param identifier: The identifier of a document. Can be the Scopus EID , the Scopus ID, the PII, the Pubmed-ID or the DOI. :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param id_type: The type of used ID. Allowed values: None, 'eid', 'pii', 'scopus_id', 'pubmed_id', 'doi'. If the value is None, the function tries to infer the ID type itself. :param view: The view of the file that should be downloaded. Allowed values: META, META_ABS, REF, FULL, ENTITLED, where FULL includes all information of META_ABS view and META_ABS includes all information of the META view. For details see https://dev.elsevier.com/sc_abstract_retrieval_views.html. Note: `ENTITLED` view only contains the `document_entitlement_status`. :param kwds: Keywords passed on as query parameters. Must contain fields and values listed in the API specification at https://dev.elsevier.com/documentation/AbstractRetrievalAPI.wadl. Raises ------ ValueError If any of the parameters `id_type`, `refresh` or `view` is not one of the allowed values. Notes ----- The directory for cached results is `{path}/{view}/{identifier}`, where `path` is specified in your configuration file. In case `identifier` is a DOI, an underscore replaces the forward slash. """ # Checks identifier = str(identifier) check_parameter_value(view, VIEWS['AbstractRetrieval'], "view") if id_type is None: id_type = detect_id_type(identifier) else: allowed_id_types = ('eid', 'pii', 'scopus_id', 'pubmed_id', 'doi') check_parameter_value(id_type, allowed_id_types, "id_type") # Load json self._view = view self._refresh = refresh Retrieval.__init__(self, identifier=identifier, id_type=id_type, api='AbstractRetrieval', **kwds) if self._view in ('META', 'META_ABS', 'REF', 'FULL'): self._json = self._json['abstracts-retrieval-response'] self._head = chained_get(self._json, ["item", "bibrecord", "head"], {}) conf_path = ['source', 'additional-srcinfo', 'conferenceinfo', 'confevent'] self._confevent = chained_get(self._head, conf_path, {}) if self._view == "REF": ref_path = ["references"] else: ref_path = ['item', 'bibrecord', 'tail', 'bibliography'] self._ref = chained_get(self._json, ref_path, {}) def __str__(self): """Return pretty text version of the document. Assumes the document is a journal article and was loaded with view="META_ABS" or view="FULL". """ def convert_citedbycount(entry): try: return float(entry.citedbycount) or 0 except (ValueError, TypeError): return 0 def get_date(coverDate): try: return coverDate[:4] except TypeError: return None if self._view in ('FULL', 'META_ABS', 'META'): date = self.get_cache_file_mdate().split()[0] # Authors if self.authors: if len(self.authors) > 1: authors = _list_authors(self.authors) else: a = self.authors[0] authors = str(a.given_name) + ' ' + str(a.surname) else: authors = "(No author found)" # All other information s = f'{authors}: "{self.title}", {self.publicationName}, {self.volume}' if self.issueIdentifier: s += f'({self.issueIdentifier})' s += ', ' s += _parse_pages(self) s += f'({self.coverDate[:4]}).' if self.doi: s += f' https://doi.org/{self.doi}.\n' s += f'{self.citedby_count} citation(s) as of {date}' if self.affiliation: s += "\n Affiliation(s):\n " s += '\n '.join([aff.name for aff in self.affiliation]) elif self._view in ('REF'): try: # Sort reference list by citationcount top_n = 5 references = sorted(self.references, key=convert_citedbycount, reverse=True) top_references = [f'{reference.title} ({get_date(reference.coverDate)}). '+ f'EID: {reference.id}' for reference in references[:top_n]] except TypeError: top_n = 0 s = f'A total of {self.refcount or 0} references were found. ' if top_n: s += f'Top {top_n} references:\n\t' s += '\n\t'.join(top_references) return s
[docs] def get_bibtex(self) -> str: """Bibliographic entry in BibTeX format. Raises ------ ValueError If the item's aggregationType is not Journal. """ if self.aggregationType != 'Journal': raise ValueError('Only Journal articles supported.') # Item key year = self.coverDate[0:4] first = self.title.split()[0].title() last = self.title.split()[-1].title() key = ''.join([self.authors[0].surname, year, first, last]) # Authors authors = ' and '.join([f"{a.given_name} {a.surname}" for a in self.authors]) # Pages if self.pageRange: pages = self.pageRange elif self.startingPage: pages = f'{self.startingPage}-{self.endingPage}' else: pages = '-' # All information bib = f"@article{{{key},\n author = {{{authors}}},\n title = "\ f"{{{{{self.title}}}}},\n journal = {{{self.publicationName}}},"\ f"\n year = {{{year}}},\n volume = {{{self.volume}}},\n "\ f"number = {{{self.issueIdentifier}}},\n pages = {{{pages}}}" # DOI if self.doi: bib += f",\n doi = {{{self.doi}}}" bib += "}" return bib
[docs] def get_html(self) -> str: """Bibliographic entry in html format.""" # Author links au_link = ('<a href="https://www.scopus.com/authid/detail.url' '?origin=AuthorProfile&authorId={0}">{1}</a>') if len(self.authors) > 1: authors = u', '.join([au_link.format(a.auid, a.given_name + ' ' + a.surname) for a in self.authors[0:-1]]) authors += (u' and ' + au_link.format(self.authors[-1].auid, (str(self.authors[-1].given_name) + ' ' + str(self.authors[-1].surname)))) else: a = self.authors[0] authors = au_link.format(a.auid, a.given_name + ' ' + a.surname) title = f'<a href="{self.scopus_link}">{self.title}</a>' if self.volume and self.issueIdentifier: volissue = f'<b>{self.volume}({self.issueIdentifier})</b>' elif self.volume: volissue = f'<b>{self.volume}</b>' else: volissue = 'no volume' jlink = '<a href="https://www.scopus.com/source/sourceInfo.url'\ f'?sourceId={self.source_id}">{self.publicationName}</a>' s = f"{authors}, {title}, {jlink}, {volissue}, " +\ f"{_parse_pages(self, unicode=True)}, ({self.coverDate[:4]})." if self.doi: s += f' <a href="https://doi.org/{self.doi}">doi:{self.doi}</a>.' return s
[docs] def get_latex(self) -> str: """Bibliographic entry in LaTeX format.""" if len(self.authors) > 1: authors = _list_authors(self.authors) else: a = self.authors authors = ' '.join([a.given_name, a.surname]) if self.volume and self.issueIdentifier: volissue = f'\\textbf{{{self.volume}({self.issueIdentifier})}}' elif self.volume: volissue = f'\\textbf{{{self.volume}}}' else: volissue = 'no volume' s = f'{authors}, \\textit{{{self.title}}}, {self.publicationName}, ' +\ f'{volissue}, {_parse_pages(self)} ({self.coverDate[:4]}).' if self.doi: s += f' \\href{{https://doi.org/{self.doi}}}{{doi:{self.doi}}}, ' s += f'\\href{{{self.scopus_link}}}{{scopus:{self.eid}}}.' return s
[docs] def get_ris(self) -> str: """Bibliographic entry in RIS (Research Information System Format) format for journal articles. Raises ------ ValueError If the item's aggregationType is not Journal. """ if self.aggregationType != 'Journal': raise ValueError('Only Journal articles supported.') # Basic information ris = f"TY - JOUR\nTI - {self.title}\nJO - {self.publicationName}"\ f"\nVL - {self.volume}\nDA - {self.coverDate}\n"\ f"PY - {self.coverDate[0:4]}\nSP - {self.pageRange}\n" # Authors for au in self.authors: ris += f'AU - {au.indexed_name}\n' # DOI if self.doi: ris += f'DO - {self.doi}\nUR - https://doi.org/{self.doi}\n' # Issue if self.issueIdentifier: ris += f'IS - {self.issueIdentifier}\n' ris += 'ER - \n\n' return ris
def _get_org(aff): """Auxiliary function to extract org information from affiliation for authorgroup. """ try: org = aff['organization'] if not isinstance(org, str): try: org = org['$'] except TypeError: # Multiple names given org = ', '.join([d['$'] for d in org if d]) except KeyError: # Author group w/o affiliation org = None return org def _list_authors(lst): """Format a list of authors (Surname, Firstname and Firstname Surname).""" authors = ', '.join([' '.join([a.given_name, a.surname]) for a in lst[0:-1]]) authors += ' and ' + ' '.join([lst[-1].given_name, lst[-1].surname]) return authors def _parse_pages(self, unicode=False): """Auxiliary function to parse and format page range of a document.""" if self.pageRange: pages = f'pp. {self.pageRange}' elif self.startingPage: pages = f'pp. {self.startingPage}-{self.endingPage}' else: pages = '(no pages found)' if unicode: pages = f'{pages}' return pages def _select_by_idtype(lst, id_type): """Auxiliary function to return items matching a special idtype.""" try: return [d['$'] for d in lst if d['@idtype'] == id_type][0] except IndexError: return None