1+ import asyncio
12import os
2- from typing import List , Literal , Optional
3+ from typing import Any , List , Literal , Optional , Tuple
34
45from semanticscholar import SemanticScholar
5- from tqdm import tqdm
66
77from ..orcid import orcid_to_author_name
8- from ..self_references import ReferenceResult
8+ from ..self_citations import CitationResult
9+ from ..self_references import ReferenceResult , self_references_paper
910from ..utils import author_name_to_ssaid , get_papers_for_author
1011from .core import Entity , EntityResult
1112
@@ -14,7 +15,27 @@ class ResearcherResult(EntityResult):
1415 name : str
1516 ssid : int
1617 orcid : Optional [str ] = None
17- # TODO: the ratios will be averaged across all papers for that author
18+
19+ def _ordered_items (self ) -> List [Tuple [str , Any ]]:
20+ # enforce specific ordering
21+ return [
22+ ("name" , self .name ),
23+ ("self_reference_ratio" , self .self_reference_ratio ),
24+ ("self_citation_ratio" , self .self_citation_ratio ),
25+ ("num_references" , self .num_references ),
26+ ("num_citations" , self .num_citations ),
27+ ("self_references" , self .self_references ),
28+ ("self_citations" , self .self_citations ),
29+ ("ssid" , self .ssid ),
30+ ("orcid" , self .orcid ),
31+ ]
32+
33+ def __repr__ (self ) -> str :
34+ inner = ", " .join (f"{ k } ={ v !r} " for k , v in self ._ordered_items ())
35+ return f"{ self .__class__ .__name__ } ({ inner } )"
36+
37+ def __str__ (self ) -> str :
38+ return " " .join (f"{ k } ={ v !r} " for k , v in self ._ordered_items ())
1839
1940
2041ModeType = Literal [tuple (MODES := ("name" , "orcid" , "ssaid" , "infer" ))]
@@ -32,7 +53,7 @@ def __init__(self, input: str, mode: ModeType = "infer"):
3253 Construct researcher object for self citation/reference analysis.
3354
3455 Args:
35- input: A researcher to search for.
56+ input: A researcher to search for, identified by name, ORCID iD, or Semantic Scholar Author ID .
3657 mode: This can be a `name` `orcid` (ORCID iD) or `ssaid` (Semantic Scholar Author ID).
3758 Defaults to "infer".
3859
@@ -53,32 +74,74 @@ def __init__(self, input: str, mode: ModeType = "infer"):
5374 ):
5475 mode = "orcid"
5576 else :
56- mode = "author"
57-
77+ mode = "name"
5878 if mode == "ssaid" :
59- self .author = sch .get_author (input )
79+ self .name = sch .get_author (input ). _name
6080 self .ssid = input
6181 elif mode == "orcid" :
62- self . author = orcid_to_author_name (input )
82+ orcid_name = orcid_to_author_name (input )
6383 self .orcid = input
64- self .ssid = author_name_to_ssaid (input )
65- elif mode == "author" :
66- self .author = input
67- self .ssid = author_name_to_ssaid (input )
68-
69- # TODO: Skip over erratum / corrigendum
70- self .ssids = get_papers_for_author (self .ssid )
71-
72- def self_references (self ):
84+ self .ssid , self .name = author_name_to_ssaid (orcid_name )
85+ elif mode == "name" :
86+ name = input
87+ self .ssid , self .name = author_name_to_ssaid (input )
88+
89+ async def _self_references_async (
90+ self , verbose : bool = False
91+ ) -> List [ReferenceResult ]:
92+ """Async version of self_references."""
93+ self .ssids = await get_papers_for_author (self .ssid )
94+
95+ results : List [ReferenceResult ] = await self_references_paper (
96+ self .ssids , verbose = verbose
97+ )
98+ # Remove papers with zero references or that are erratum/corrigendum
99+ results = [
100+ r
101+ for r in results
102+ if r .num_references > 0
103+ and "erratum" not in r .title .lower ()
104+ and "corrigendum" not in r .title .lower ()
105+ ]
106+
107+ return results
108+
109+ def self_references (self , verbose : bool = False ) -> ResearcherResult :
73110 """
74111 Sifts through all papers of a researcher and extracts the self references.
75- """
76- # TODO: Asynchronous call to self_references
77- print ("Going through SSIDs" , self .ssids )
78112
79- # TODO: Aggregate results
113+ Args:
114+ verbose: If True, logs detailed information for each paper.
80115
81- def self_citations (self ):
116+ Returns:
117+ A ResearcherResult containing aggregated self-reference data.
118+ """
119+ reference_results = asyncio .run (self ._self_references_async (verbose = verbose ))
120+
121+ individual_self_references = {
122+ getattr (result , "title" ): getattr (result , "self_references" ).get (self .name , 0.0 )
123+ for result in reference_results
124+ }
125+ reference_ratio = sum (individual_self_references .values ()) / max (1 , len (
126+ individual_self_references
127+ ))
128+ return ResearcherResult (
129+ name = self .name ,
130+ ssid = int (self .ssid ),
131+ orcid = self .orcid ,
132+ num_references = sum (r .num_references for r in reference_results ),
133+ num_citations = - 1 ,
134+ self_references = dict (
135+ sorted (
136+ individual_self_references .items (), key = lambda x : x [1 ], reverse = True
137+ )
138+ ),
139+ self_citations = {},
140+ self_reference_ratio = round (reference_ratio , 3 ),
141+ self_citation_ratio = - 1.0 ,
142+ )
143+
144+ def self_citations (self ) -> ResearcherResult :
82145 """
83146 Sifts through all papers of a researcher and finds how often they are self-cited.
84147 """
0 commit comments