Source code for pybliometrics.scival.topic_lookup_metrics

from typing import NamedTuple

from pybliometrics.superclasses import Retrieval
from pybliometrics.utils import make_int_if_possible
from pybliometrics.utils.constants import SCIVAL_METRICS
from pybliometrics.utils.parse_metrics import extract_metric_data, extract_metric_lists, MetricData


class Topic(NamedTuple):
    """Named tuple representing a topic."""
    id: int | None
    name: str | None
    uri: str | None
    prominencePercentile: int | float | None
    scholarlyOutput: int | float | None


class CorePaper(NamedTuple):
    """Named tuple representing a core paper."""
    entity_id: int | None
    entity_name: str | None
    publication_id: int | None


class RecentPaper(NamedTuple):
    """Named tuple representing a recently published paper."""
    entity_id: int | None
    entity_name: str | None
    publication_id: int | None


class RelatedTopic(NamedTuple):
    """Named tuple representing a related topic."""
    entity_id: int | None
    entity_name: str | None
    related_topic_id: int | None
    related_topic_name: str | None
    related_topic_uri: str | None
    prominencePercentile: int | float | None
    relationScore: int | float | None
    relatedTopicRank: int | None


class TopAuthor(NamedTuple):
    """Named tuple representing a top author."""
    entity_id: int | None
    entity_name: str | None
    author_id: int | None
    author_name: str | None
    publicationCount: int | float | None


class TopPublication(NamedTuple):
    """Named tuple representing a top cited publication."""
    entity_id: int | None
    entity_name: str | None
    publication_id: int | None
    citationCount: int | float | None


class TopInstitution(NamedTuple):
    """Named tuple representing a top institution."""
    entity_id: int | None
    entity_name: str | None
    institution_id: int | None
    institution_name: str | None
    publicationCount: int | float | None


class TopJournal(NamedTuple):
    """Named tuple representing a top journal."""
    entity_id: int | None
    entity_name: str | None
    journal_id: int | None
    journal_name: str | None
    publicationCount: int | float | None
    citationCount: int | float | None
    authorCount: int | float | None
    publicationGrowth: float | None
    authorGrowth: float | None
    sjr: float | None
    snip: float | None
    citeScore: float | None


class TopKeyword(NamedTuple):
    """Named tuple representing a top keyword."""
    entity_id: int | None
    entity_name: str | None
    keyword_name: str | None
    keyword_uri: str | None
    weight: int | float | None
    relevance: int | float | None
    publicationCount: int | float | None
    publicationGrowth: float | None


[docs] class TopicLookupMetrics(Retrieval): @property def topics(self) -> list[Topic]: """A list of namedtuples representing topics and their basic info in the form `(id, name, uri, prominencePercentile, scholarlyOutput)`. """ out = [] results = self._json.get('results', []) for result in results: topic_data = result.get('topic', {}) new = Topic( id=make_int_if_possible(topic_data.get('id')), name=topic_data.get('name'), uri=topic_data.get('uri'), prominencePercentile=topic_data.get('prominencePercentile'), scholarlyOutput=topic_data.get('scholarlyOutput') ) out.append(new) return out @property def AuthorCount(self) -> list[MetricData] | None: """Author count metrics for each topic. Returns list of MetricData namedtuples with structure: (entity_id, entity_name, metric, year, value, percentage, threshold), """ return extract_metric_data(self._json, 'AuthorCount', self._by_year, "topic") @property def CitationCount(self) -> list[MetricData] | None: """Citation count metrics for each topic. Returns list of MetricData namedtuples with structure: (entity_id, entity_name, metric, year, value, percentage, threshold), """ return extract_metric_data(self._json, 'CitationCount', self._by_year, "topic") @property def CorePapers(self) -> list[CorePaper] | None: """Core papers for the topic. Returns list of CorePaper namedtuples with structure: (entity_id, entity_name, publication_id). """ out = [] for item in extract_metric_lists(self._json, "CorePapers", "topic"): entity_id = item.get('entity_id') entity_name = item.get('entity_name') for paper in item.get('values', []): out.append(CorePaper( entity_id=entity_id, entity_name=entity_name, publication_id=paper.get('id') )) return out or None @property def FieldWeightedCitationImpact(self) -> list[MetricData] | None: """Field weighted citation impact metrics for each topic. Returns list of MetricData namedtuples with structure: (entity_id, entity_name, metric, year, value, percentage, threshold). """ return extract_metric_data(self._json, 'FieldWeightedCitationImpact', self._by_year, "topic") @property def InstitutionCount(self) -> list[MetricData] | None: """Institution count metrics for each topic. Returns list of MetricData namedtuples with structure: (entity_id, entity_name, metric, year, value, percentage, threshold). """ return extract_metric_data(self._json, 'InstitutionCount', self._by_year, "topic") @property def MostRecentlyPublishedPapers(self) -> list[RecentPaper] | None: """Most recently published papers for the topic. Returns list of RecentPaper namedtuples with structure: (entity_id, entity_name, publication_id). """ out = [] for item in extract_metric_lists(self._json, "MostRecentlyPublishedPapers", "topic"): entity_id = item.get('entity_id') entity_name = item.get('entity_name') for paper in item.get('values', []): out.append(RecentPaper( entity_id=entity_id, entity_name=entity_name, publication_id=paper.get('id') )) return out or None @property def RelatedTopics(self) -> list[RelatedTopic] | None: """Related topics for the topic. Returns list of RelatedTopic namedtuples with structure: (entity_id, entity_name, related_topic_id, related_topic_name, related_topic_uri, prominencePercentile, relationScore, relatedTopicRank). """ out = [] for item in extract_metric_lists(self._json, "RelatedTopics", "topic"): entity_id = item.get('entity_id') entity_name = item.get('entity_name') for topic in item.get('values', []): topic_data = topic.get('topic', {}) out.append(RelatedTopic( entity_id=entity_id, entity_name=entity_name, related_topic_id=topic_data.get('id'), related_topic_name=topic_data.get('name'), related_topic_uri=topic_data.get('uri'), prominencePercentile=topic.get('prominencePercentile'), relationScore=topic.get('relationScore'), relatedTopicRank=topic.get('relatedtopicrank') )) return out or None @property def ScholarlyOutput(self) -> list[MetricData] | None: """Scholarly output metrics for each topic. Returns list of MetricData namedtuples with structure: (entity_id, entity_name, metric, year, value, percentage, threshold). """ return extract_metric_data(self._json, 'ScholarlyOutput', self._by_year, "topic") @property def TopAuthors(self) -> list[TopAuthor] | None: """Top authors for the topic. Returns list of TopAuthor namedtuples with structure: (entity_id, entity_name, author_id, author_name, publicationCount). """ out = [] for item in extract_metric_lists(self._json, "TopAuthors", "topic"): entity_id = item.get('entity_id') entity_name = item.get('entity_name') for author in item.get('values', []): out.append(TopAuthor( entity_id=entity_id, entity_name=entity_name, author_id=author.get('id'), author_name=author.get('name'), publicationCount=author.get('publicationCount') )) return out or None @property def TopCitedPublications(self) -> list[TopPublication] | None: """Top cited publications for the topic. Returns list of TopPublication namedtuples with structure: (entity_id, entity_name, publication_id, citationCount). """ out = [] for item in extract_metric_lists(self._json, "TopCitedPublications", "topic"): entity_id = item.get('entity_id') entity_name = item.get('entity_name') for pub in item.get('values', []): out.append(TopPublication( entity_id=entity_id, entity_name=entity_name, publication_id=pub.get('id'), citationCount=pub.get('citationCount') )) return out or None @property def TopInstitutions(self) -> list[TopInstitution] | None: """Top institutions for the topic. Returns list of TopInstitution namedtuples with structure: (entity_id, entity_name, institution_id, institution_name, publicationCount). """ out = [] for item in extract_metric_lists(self._json, "TopInstitutions", "topic"): entity_id = item.get('entity_id') entity_name = item.get('entity_name') for institution in item.get('values', []): out.append(TopInstitution( entity_id=entity_id, entity_name=entity_name, institution_id=institution.get('id'), institution_name=institution.get('name'), publicationCount=institution.get('publicationCount') )) return out or None @property def TopJournals(self) -> list[TopJournal] | None: """Top journals for the topic. Returns list of TopJournal namedtuples with structure: (entity_id, entity_name, journal_id, journal_name, publicationCount, citationCount, authorCount, publicationGrowth, authorGrowth, sjr, snip, citeScore). """ out = [] for item in extract_metric_lists(self._json, "TopJournals", "topic"): entity_id = item.get('entity_id') entity_name = item.get('entity_name') for journal in item.get('values', []): out.append(TopJournal( entity_id=entity_id, entity_name=entity_name, journal_id=journal.get('id'), journal_name=journal.get('name'), publicationCount=journal.get('publicationCount'), citationCount=journal.get('citationCount'), authorCount=journal.get('authorCount'), publicationGrowth=journal.get('publicationGrowth'), authorGrowth=journal.get('authorGrowth'), sjr=journal.get('sjr'), snip=journal.get('snip'), citeScore=journal.get('citeScore') )) return out or None @property def TopKeywords(self) -> list[TopKeyword] | None: """Top keywords for the topic. Returns list of TopKeyword namedtuples with structure: (entity_id, entity_name, keyword_name, keyword_uri, weight, relevance, publicationCount, publicationGrowth). """ out = [] for item in extract_metric_lists(self._json, "TopKeywords", "topic"): entity_id = item.get('entity_id') entity_name = item.get('entity_name') for keyword in item.get('values', []): out.append(TopKeyword( entity_id=entity_id, entity_name=entity_name, keyword_name=keyword.get('name'), keyword_uri=keyword.get('uri'), weight=keyword.get('weight'), relevance=keyword.get('relevance'), publicationCount=keyword.get('publicationCount'), publicationGrowth=keyword.get('publicationGrowth') )) return out or None def __init__(self, topic_ids: str | list, metric_types: str | list | None = None, by_year: bool = False, refresh: bool | int = False, **kwds: str ) -> None: """Interaction with the SciVal's `metrics` endpoint of the `TopicLookup API`. :param topic_ids: SciVal Topic ID(s). Can be a single ID or comma-separated string of IDs, or a list of IDs (e.g. `[1516, 1517]`). :param metric_types: Metric type(s) to retrieve. Can be a single metric or comma-separated string, or a list. Available metrics are: AuthorCount, CitationCount, CorePapers, FieldWeightedCitationImpact, InstitutionCount, MostRecentlyPublishedPapers, RelatedTopics, ScholarlyOutput, TopAuthors, TopCitedPublications, TopInstitutions, TopJournals, TopKeywords. If not provided, all metrics are retrieved. :param by_year: Whether to retrieve metrics broken down by year. Note: Some metrics are not available by year: CorePapers, MostRecentlyPublishedPapers, RelatedTopics, TopAuthors, TopInstitutions, TopJournals, TopKeywords. :param refresh: Whether to refresh the cached file if it exists or not. If int is passed, cached file will be refreshed if the number of days since last modification exceeds that value. :param kwds: Keywords passed on as query parameters. Must contain fields and values mentioned in the https://dev.elsevier.com/documentation/SciValTopicAPI.wadl. """ self._view = '' self._refresh = refresh self._by_year = by_year # Handle topic_ids parameter if isinstance(topic_ids, list): topic_ids = ",".join(str(t) for t in topic_ids) else: topic_ids = topic_ids.replace(" ", "") # Depend on by_year, set default metric_types if not provided if metric_types is None: if not by_year: metric_types = SCIVAL_METRICS["TopicLookupMetrics"]["byYear"] + SCIVAL_METRICS["TopicLookupMetrics"]["notByYear"] else: metric_types = SCIVAL_METRICS["TopicLookupMetrics"]["byYear"] if isinstance(metric_types, list): metric_types = ",".join(metric_types) # Set up parameters for the API call params = { 'topicIds': topic_ids, 'metricTypes': metric_types, 'byYear': str(by_year).lower(), **kwds } Retrieval.__init__(self, **params) def __str__(self): """Return pretty text version of the topic metrics.""" topics = self.topics or [] topic_count = len(topics) if topic_count == 0: return "No topics found" else: s = f"TopicLookupMetrics for {topic_count} topic(s):" for topic in topics: s += f"\n- {topic.name} (ID: {topic.id})" return s