Skip to content

Commit

Permalink
Add tree of science to preprocess (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
odarbelaeze authored Sep 11, 2024
1 parent 608eb05 commit 4ea9b15
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,4 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
*.xlsx
60 changes: 60 additions & 0 deletions src/bibx/algorithms/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

from bibx import Collection

from .sap import BRANCH, LEAF, ROOT, TRUNK, Sap


class Preprocess:
def __init__(self, wos: Collection, scopus: Collection) -> None:
Expand Down Expand Up @@ -150,6 +152,61 @@ def write_times_cited_information(self, workseet: Worksheet) -> None:
workseet.write(i, 1, times_cited)
workseet.write(i, 2, times_cited / total)

@staticmethod
def _get_tos(data: dict) -> str:
if data[ROOT] > 0:
return "Root"
elif data[TRUNK] > 0:
return "Trunk"
elif data[LEAF] > 0:
return "Leaf"
elif data[BRANCH] > 0:
return f"Branch {data[BRANCH]}"
return "_"

def write_tree_of_science_information(self, workseet: Worksheet) -> None:
s = Sap()
g = s.create_graph(self.merged)
g = s.clean_graph(g)
g = s.tree(g)

for i, title in enumerate(
[
"TOS",
"Label",
"Authors",
"Year",
"Title",
"Journal",
"Volume",
"Issue",
"Page",
"DOI",
"Times Cited",
]
):
workseet.write(0, i, title)

i = 1
for label, data in sorted(
g.nodes(data=True), key=lambda x: self._get_tos(x[1])
):
tos = self._get_tos(data)
if tos == "_":
continue
workseet.write(i, 0, tos)
workseet.write(i, 1, label)
workseet.write(i, 2, "; ".join(data["authors"]))
workseet.write(i, 3, data["year"])
workseet.write(i, 4, data["title"])
workseet.write(i, 5, data["journal"])
workseet.write(i, 6, data["volume"])
workseet.write(i, 7, data["issue"])
workseet.write(i, 8, data["page"])
workseet.write(i, 9, data["doi"])
workseet.write(i, 10, data["times_cited"])
i += 1

def create_workbook(self, filename: str) -> None:
workbook = Workbook(filename)
self.write_merged_information(workbook.add_worksheet("Merged"))
Expand All @@ -159,4 +216,7 @@ def create_workbook(self, filename: str) -> None:
self.write_journal_information(workbook.add_worksheet("Journals"))
self.write_author_information(workbook.add_worksheet("Authors"))
self.write_times_cited_information(workbook.add_worksheet("Times Cited"))
self.write_tree_of_science_information(
workbook.add_worksheet("Tree of Science")
)
workbook.close()
8 changes: 4 additions & 4 deletions stubs/networkx/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from typing import Any, Dict, Iterable, Iterator, List
from typing import Any, Iterable, Iterator, List

from _typeshed import Incomplete, Self
from _typeshed import Self
from networkx.classes.reportviews import NodeView

__version__: str

class Graph:
nodes: NodeView
def subgraph(self: Self, nodes: Iterable) -> Self: ...
def copy(self: Self) -> Self: ...
@property
def nodes(self) -> Dict[str, Incomplete]: ...
def add_node(self, node: str, **kwargs) -> None: ...
def add_edge(self, u: str, v: str, **kwargs) -> None: ...
def add_edges_from(self, edges: Iterable) -> None: ...
Expand Down

0 comments on commit 4ea9b15

Please sign in to comment.