diff --git a/.bibliometrics.config.json b/.bibliometrics.config.json index b62ee32..f7768ae 100644 --- a/.bibliometrics.config.json +++ b/.bibliometrics.config.json @@ -1,5 +1,6 @@ { "jsonOutputFile": "bibliometrics.json", + "firstPubYear": 1999, "svgConfig": [ { "background": "#010409", diff --git a/README.md b/README.md index 2901850..e1be055 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ This command line utility does the following: * computes your w-index ([doi:10.1002/asi.21276](https://doi.org/10.1002/asi.21276)), hiding if equal to 0, and provided it is less than 100 ([reason for limitation later](#respect-google-scholars-robotstxt)); * computes your e-index ([doi:10.1371/journal.pone.0005429](https://doi.org/10.1371/journal.pone.0005429)), your r-index ([doi:10.1007/s11434-007-0145-9](https://doi.org/10.1007/s11434-007-0145-9)), and your a-index provided that your h-index is at most 100 ([reason for limitation later](#respect-google-scholars-robotstxt)); * computes your h-median provided that your h-index is less than 200 ([reason for limitation later](#respect-google-scholars-robotstxt)); +* computes your m-quotient, if you configure the year of your first publication in the configuration file; * generates a JSON file summarizing these bibliometrics; * generates one or more SVG images summarizing these bibliometrics; and * includes all bibliometrics that are non-zero by default, but enables user-configurable list of bibliometrics. @@ -59,6 +60,9 @@ The bibliometrics utility computes the following bibliometrics: * Most-cited paper: number of citations to the researcher's most-cited paper. * [h-index](https://doi.org/10.1073/pnas.0507655102): the maximum h such that the researcher's h most-cited papers have been cited at least h times each. +* m-quotient: h-index / n, the number of years since first publication, which + was introduced in the same article as the h-index itself as a way of adjusting + for length of publication history. * [g-index](https://doi.org/10.1007/s11192-006-0144-7): the maximum g such that the researcher's g most-cited papers have been cited an average of g times each. @@ -105,6 +109,7 @@ Here is a sample of the JSON summary also generated by the utility: "h-median": 48, "i10-index": 33, "i100-index": 3, + "m-quotient": 1.0, "most-cited": 228, "o-index": 75, "r-index": 42.3, @@ -130,10 +135,13 @@ at the root of this repository: [.bibliometrics.config.json](https://github.com/ To generate the JSON summary of your bibliometrics, specify the filename (optionally with path) via the `"jsonOutputFile"` field. If this field is not present, then no JSON file will be generated. +To compute the m-quotient, you must provide the year of your first publication in the `"firstPubYear"` +field. The bibliometrics utility does not attempt to scrape this from your Scholar profile. + To change the order that the bibliometrics appear in the SVG, or to explicitly exclude one or more bibliometrics, you can use the `"include"` field. This field is an array of keys associated with the various bibliometrics. If this field is not present, then the following default order is -used: `[ "total-cites", "five-year-cites", "most-cited", "h-index", "g-index", "i10-index", "i100-index", "i1000-index", "i10000-index", "w-index", "o-index", "h-median", "e-index", "r-index", "a-index" ]`. There is no +used: `[ "total-cites", "five-year-cites", "most-cited", "h-index", "g-index", "i10-index", "i100-index", "i1000-index", "i10000-index", "w-index", "o-index", "h-median", "m-quotient", "e-index", "r-index", "a-index" ]`. There is no reason to use this field if the only thing you want to do is to exclude bibliometrics that have the value 0. Such bibliometrics will be excluded by default. The list of keys for the bibliometrics to include is case-insensitive. @@ -153,11 +161,13 @@ channel, `#123`. You can also use SVG named colors, such as `white`, as well as `rgba(56,139,253,0.4)`. If it is valid as a color in SVG, then it should work. The utility simply inserts it for the relevant color within the SVG without validation. -Here is a sample `.bibliometrics.config.json` (using the default order of the bibliometrics): +Here is a sample `.bibliometrics.config.json` (using the default order of the bibliometrics, +and providing the year of first publication): ```JSON { "jsonOutputFile": "bibliometrics.json", + "firstPubYear": 1999, "svgConfig": [ { "background": "#010409", @@ -177,9 +187,11 @@ Here is a sample `.bibliometrics.config.json` (using the default order of the bi } ``` -Here is another sample that generates three SVGs, overriding the default order at the top-level to exclude the -i10-index (and related indexes), and then overriding it again for one of the three SVGs to additionally exclude -the g-index, w-index, o-index, h-median, e-index, r-index, and a-index: +Here is another sample that generates three SVGs, overriding the default order at the top-level +to exclude the i10-index (and related indexes), and then overriding it again for one of the +three SVGs to additionally exclude the g-index, w-index, o-index, h-median, e-index, r-index, +and a-index. This example also does not specify "firstPubYear", which means that the m-quotient +won't be calculated: ```JSON { diff --git a/bibliometrics.json b/bibliometrics.json index fddb64e..0957f5c 100644 --- a/bibliometrics.json +++ b/bibliometrics.json @@ -7,6 +7,7 @@ "h-median": 48, "i10-index": 33, "i100-index": 3, + "m-quotient": 1.0, "most-cited": 228, "o-index": 75, "r-index": 42.3, diff --git a/images/bibliometrics.svg b/images/bibliometrics.svg index 25c5e06..1081522 100644 --- a/images/bibliometrics.svg +++ b/images/bibliometrics.svg @@ -1 +1 @@ -BibliometricsTotal citations2052Five-year citations364Most-cited paper228h-index25g-index44i10-index33i100-index3w-index8o-index75h-median48e-index34.12r-index42.30a-index71.56Last updated: 05 March 2024 \ No newline at end of file +BibliometricsTotal citations2052Five-year citations364Most-cited paper228h-index25g-index44i10-index33i100-index3w-index8o-index75h-median48m-quotient1.00e-index34.12r-index42.30a-index71.56Last updated: 05 March 2024 \ No newline at end of file diff --git a/images/bibliometrics2.svg b/images/bibliometrics2.svg index b23421e..92dfee7 100644 --- a/images/bibliometrics2.svg +++ b/images/bibliometrics2.svg @@ -1 +1 @@ -BibliometricsTotal citations2052Five-year citations364Most-cited paper228h-index25g-index44i10-index33i100-index3w-index8o-index75h-median48e-index34.12r-index42.30a-index71.56Last updated: 05 March 2024 \ No newline at end of file +BibliometricsTotal citations2052Five-year citations364Most-cited paper228h-index25g-index44i10-index33i100-index3w-index8o-index75h-median48m-quotient1.00e-index34.12r-index42.30a-index71.56Last updated: 05 March 2024 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 8e88615..a2e7a29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ keywords = [ "i1000-index", "i10000-index", "journals", + "m-quotient", "o-index", "publications", "researcher", diff --git a/src/bibliometrics/bibliometrics.py b/src/bibliometrics/bibliometrics.py index 0803664..46ae42f 100755 --- a/src/bibliometrics/bibliometrics.py +++ b/src/bibliometrics/bibliometrics.py @@ -90,6 +90,7 @@ def generateBibliometricsImage(metrics, colors, titleText, stats) : "five-year-cites" : "Five-year citations", "most-cited" : "Most-cited paper", "h-index" : "h-index", + "m-quotient" : "m-quotient", "g-index" : "g-index", "i10-index" : "i10-index", "i100-index" : "i100-index", @@ -258,15 +259,19 @@ def scrapePage(page) : metrics["i10-index"] = int(i10.strip()) return metrics -def parseBibliometrics(page) : +def parseBibliometrics(page, year) : """Parses a Scholar Profile for the bibliometrics. Keyword arguments: page - The user profile page + year - The year of the first publication, which will be None if user + didn't provide in the configuration (i.e., this is not scraped + from profile) """ calc = BibliometricCalculator( scrapePage(page), - parse_cites_per_pub(page) + parse_cites_per_pub(page), + year ) metrics = calc.to_dict() validateMetrics(metrics) @@ -446,7 +451,10 @@ def main() : page = getScholarProfilePage(scholarID) - metrics = parseBibliometrics(page) + metrics = parseBibliometrics( + page, + configuration["firstPubYear"] if "firstPubYear" in configuration else None + ) # default metrics in default order stats = [ @@ -462,6 +470,7 @@ def main() : "w-index", "o-index", "h-median", + "m-quotient", "e-index", "r-index", "a-index" diff --git a/src/bibliometrics/calculator.py b/src/bibliometrics/calculator.py index 0ed81ba..41f5bc7 100644 --- a/src/bibliometrics/calculator.py +++ b/src/bibliometrics/calculator.py @@ -25,21 +25,28 @@ # import math +from datetime import datetime class BibliometricCalculator: """Calculates the various bibliometrics.""" __slots__ = [ '_metrics' ] - def __init__(self, metrics, cites_list): + def __init__(self, metrics, cites_list, year): """Initializes the BibliometricCalculator. Keyword arguments: metrics - a dict of the metrics scraped directly from Scholar profile cites_list - a list of the citations of articles scraped from profile + year - The year of the first publication, which will be None if user + didn't provide in the configuration (i.e., this is not scraped + from profile) """ self._metrics = dict(metrics) - if "h-index" not in self._metrics or len(cites_list) == 0: + if "h-index" not in self._metrics: + return + self._calulate_m_quotient(year) + if len(cites_list) == 0: return sorted_cites = sorted(cites_list, reverse=True) if sorted_cites[0] <= 0: @@ -185,3 +192,16 @@ def _calculate_w_index(self, sorted_cites): if w > 0 and w < 100: self._metrics["w-index"] = w + def _calulate_m_quotient(self, year): + """Calculates the m-quotient if the user provded the year of + first publication in the configuration. + + Keyword arguments: + year - the year of first publication or None if not provided + """ + if year: + n = datetime.now().year - year + m = self._metrics["h-index"] / n if n > 0 else 0 + if m > 0: + self._metrics["m-quotient"] = "{0:.2f}".format(m) + diff --git a/tests/tests.py b/tests/tests.py index 3c386c2..2321b81 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -27,6 +27,7 @@ import unittest import sys, math +from datetime import datetime sys.path.insert(0,'src') import bibliometrics.bibliometrics as bib from bibliometrics.calculator import BibliometricCalculator @@ -37,6 +38,22 @@ class TestBibiometrics(unittest.TestCase) : # change this to True. printSampleImage = False + def test_calculator_m_quotient(self): + metrics = { + "total-cites" : 42, + "five-year-cites" : 6, + "h-index" : 24, + "i10-index" : 1 + } + elapsed_years = [ 1, 2, 4, 8, 16] + expected = ["24.00", "12.00", "6.00", "3.00", "1.50"] + for n, e in zip(elapsed_years, expected): + year = datetime.now().year - n + calc = BibliometricCalculator(metrics, [], year) + self.assertEqual(e, calc._metrics["m-quotient"]) + calc = BibliometricCalculator(metrics, [], datetime.now().year) + self.assertFalse("m-quotient" in calc._metrics) + def test_calculator_retains_scraped(self): metrics = { "total-cites" : 42, @@ -44,9 +61,9 @@ def test_calculator_retains_scraped(self): "h-index" : 3, "i10-index" : 1 } - calc = BibliometricCalculator(metrics, []) + calc = BibliometricCalculator(metrics, [], None) self.assertEqual(metrics, calc._metrics) - calc = BibliometricCalculator(metrics, [5, 20, 9]) + calc = BibliometricCalculator(metrics, [5, 20, 9], None) for key, value in metrics.items(): self.assertEqual(value, calc._metrics[key]) @@ -57,7 +74,7 @@ def test_calculate_most(self): "h-index" : 3, "i10-index" : 1 } - calc = BibliometricCalculator(metrics, [5, 20, 9]) + calc = BibliometricCalculator(metrics, [5, 20, 9], None) self.assertEqual(20, calc._metrics["most-cited"]) def test_calculate_w_index(self): @@ -68,7 +85,7 @@ def test_calculate_w_index(self): "i10-index" : 1 } cites = [100, 90, 80, 70, 60, 50, 40, 30, 20, 10, 5, 4, 3, 2, 1 ] - calc = BibliometricCalculator(metrics, cites) + calc = BibliometricCalculator(metrics, cites, None) self.assertEqual(5, calc._metrics["w-index"]) def test_calculate_o_index(self): @@ -78,13 +95,21 @@ def test_calculate_o_index(self): "h-index" : 42, "i10-index" : 1 } - self.assertEqual(42, BibliometricCalculator(metrics, [42]*42)._metrics["o-index"]) + self.assertEqual( + 42, + BibliometricCalculator(metrics, [42]*42, None)._metrics["o-index"]) metrics["h-index"] = 1 - self.assertEqual(8, BibliometricCalculator(metrics, [5, 20, 64])._metrics["o-index"]) + self.assertEqual( + 8, + BibliometricCalculator(metrics, [5, 20, 64], None)._metrics["o-index"]) metrics["h-index"] = 2 - self.assertEqual(8, BibliometricCalculator(metrics, [5, 20, 32])._metrics["o-index"]) + self.assertEqual( + 8, + BibliometricCalculator(metrics, [5, 20, 32], None)._metrics["o-index"]) metrics["h-index"] = 0 - self.assertEqual(0, BibliometricCalculator(metrics, [1, 0, 2])._metrics["o-index"]) + self.assertEqual( + 0, + BibliometricCalculator(metrics, [1, 0, 2], None)._metrics["o-index"]) def test_calculate_g(self) : metrics = { @@ -95,10 +120,14 @@ def test_calculate_g(self) : } for g in range(1, 11) : cites = [10]*g - self.assertEqual(g, BibliometricCalculator(metrics, cites)._metrics["g-index"]) + self.assertEqual( + g, + BibliometricCalculator(metrics, cites, None)._metrics["g-index"]) for g in range(11, 21) : cites = [10]*g - self.assertEqual(10, BibliometricCalculator(metrics, cites)._metrics["g-index"]) + self.assertEqual( + 10, + BibliometricCalculator(metrics, cites, None)._metrics["g-index"]) def test_calculate_h_median(self): metrics = { @@ -114,13 +143,18 @@ def test_calculate_h_median(self): for i in range(len(expected)): metrics["h-index"] = i + 1 if expected[i] > 0: - h_median = BibliometricCalculator(metrics, cites)._metrics["h-median"] + h_median = BibliometricCalculator( + metrics, + cites, + None)._metrics["h-median"] self.assertEqual( expected[i], float(h_median) if isinstance(h_median, str) else h_median ) else: - self.assertFalse("h-median" in BibliometricCalculator(metrics, cites)._metrics) + self.assertFalse( + "h-median" in BibliometricCalculator( + metrics, cites, None)._metrics) def test_calculate_h_core_citations(self): metrics = { @@ -135,7 +169,10 @@ def test_calculate_h_core_citations(self): expected = 10 * (55 - (10-h)*(11-h)//2) self.assertEqual( expected, - BibliometricCalculator(metrics, cites)._calculate_h_core_citations(cites) + BibliometricCalculator( + metrics, + cites, + None)._calculate_h_core_citations(cites) ) def test_calculate_e_no_excess(self): @@ -148,7 +185,11 @@ def test_calculate_e_no_excess(self): for h in range(0, 10) : metrics["h-index"] = h cites = [h]*20 - self.assertFalse("e-index" in BibliometricCalculator(metrics, cites)._metrics) + self.assertFalse( + "e-index" in BibliometricCalculator( + metrics, + cites, + None)._metrics) def test_calculate_e_equal_excess(self) : metrics = { @@ -162,7 +203,8 @@ def test_calculate_e_equal_excess(self) : cites = [h+5]*h + [1]*5 self.assertAlmostEqual( math.sqrt(5*h), - float(BibliometricCalculator(metrics, cites)._metrics["e-index"]), + float(BibliometricCalculator( + metrics, cites, None)._metrics["e-index"]), places=2 ) @@ -178,7 +220,8 @@ def test_calculate_e_unequal_excess(self) : cites = [h+x for x in range(h, 0, -1)] self.assertAlmostEqual( math.sqrt(h*(h+1)/2), - float(BibliometricCalculator(metrics, cites)._metrics["e-index"]), + float(BibliometricCalculator( + metrics, cites, None)._metrics["e-index"]), places=2 ) @@ -196,7 +239,8 @@ def test_calculate_R_index(self): cites = [h_core_sum] + [0]*5 self.assertEqual( expected, - float(BibliometricCalculator(metrics, cites)._metrics["r-index"]) + float(BibliometricCalculator( + metrics, cites, None)._metrics["r-index"]) ) def test_calculate_A_index(self): @@ -214,7 +258,8 @@ def test_calculate_A_index(self): cites = [h_core_sum // h]*h + [0]*5 self.assertEqual( expected, - float(BibliometricCalculator(metrics, cites)._metrics["a-index"]) + float(BibliometricCalculator( + metrics, cites, None)._metrics["a-index"]) ) def test_calculate_ixx_index(self): @@ -225,7 +270,7 @@ def test_calculate_ixx_index(self): "i10-index" : 1 } cites = [10001, 10000, 1002, 1001, 1000, 103, 102, 101, 100, 5, 4, 3, 2, 1] - calc = BibliometricCalculator(metrics, cites) + calc = BibliometricCalculator(metrics, cites, None) self.assertEqual(2, calc._metrics["i10000-index"]) self.assertEqual(5, calc._metrics["i1000-index"]) self.assertEqual(9, calc._metrics["i100-index"]) @@ -233,7 +278,24 @@ def test_calculate_ixx_index(self): def test_parse(self) : with open("tests/testcase.html.txt", "r") as f : page = f.read().replace('\n', '') - metrics = bib.parseBibliometrics(page) + metrics = bib.parseBibliometrics(page, None) + self.assertEqual(2052, metrics["total-cites"]) + self.assertEqual(364, metrics["five-year-cites"]) + self.assertEqual(25, metrics["h-index"]) + self.assertEqual(33, metrics["i10-index"]) + self.assertEqual(44, metrics["g-index"]) + self.assertEqual(228, metrics["most-cited"]) + self.assertEqual(3, metrics["i100-index"]) + self.assertEqual(75, metrics["o-index"]) + self.assertEqual(48, metrics["h-median"]) + self.assertEqual(8, metrics["w-index"]) + self.assertEqual("34.12", metrics["e-index"]) + self.assertEqual("42.30", metrics["r-index"]) + self.assertEqual("71.56", metrics["a-index"]) + self.assertFalse("i1000-index" in metrics) + self.assertFalse("i10000-index" in metrics) + self.assertFalse("m-quotient" in metrics) + metrics = bib.parseBibliometrics(page, datetime.now().year - 25) self.assertEqual(2052, metrics["total-cites"]) self.assertEqual(364, metrics["five-year-cites"]) self.assertEqual(25, metrics["h-index"]) @@ -244,6 +306,7 @@ def test_parse(self) : self.assertEqual(75, metrics["o-index"]) self.assertEqual(48, metrics["h-median"]) self.assertEqual(8, metrics["w-index"]) + self.assertEqual("1.00", metrics["m-quotient"]) self.assertEqual("34.12", metrics["e-index"]) self.assertEqual("42.30", metrics["r-index"]) self.assertEqual("71.56", metrics["a-index"]) @@ -264,7 +327,8 @@ def test_generate_image(self) : "h-median" : 48, "e-index" : "34.12", "r-index" : "42.30", - "a-index" : "71.56" + "a-index" : "71.56", + "m-quotient" : "1.00" } stats = [ "total-cites", @@ -279,6 +343,7 @@ def test_generate_image(self) : "w-index", "o-index", "h-median", + "m-quotient", "e-index", "r-index", "a-index"