jaebradley · AustinSmith29 · May 26, 2020 · May 26, 2020 · May 26, 2020 · May 26, 2020
diff --git a/README.md b/README.md
@@ -42,13 +42,14 @@ from basketball_reference_web_scraper.data import Team
 
 ## API
 
-This client has seven methods
+This client has eight methods
 * Getting player box scores by a date (`client.player_box_scores`)
 * Getting team box scores by a date (`client.team_box_scores`)
 * Getting the schedule for a season (`client.season_schedule`)
 * Getting players totals for a season (`client.players_season_totals`)
 * Getting players advanced season statistics for a season (`client.players_advanced_season_totals`)
 * Getting regular season box scores for a given player and season (`client.regular_season_player_box_scores`)
+* Getting the salaries of players of a team for a season (`client.team_salaries`)
 * Searching (`client.search`)
 
 You can see all methods used in [this `repl`]()https://repl.it/@jaebradley/v300api-examples).
@@ -179,6 +180,21 @@ The `player_identifier` is Basketball Reference's unique identifier for each pla
 his `player_identifier` is `westbru01` (you can see this from his player page URL: 
 `https://www.basketball-reference.com/players/w/westbru01/gamelog/2020`)
 
+### Get salary data for a team in a particular season
+
+```python
+from basketball_reference_web_scraper import client
+from basketball_reference.data import Team
+
+# Get salaries of all the players on the 1997-1998 Bulls team
+client.team_salaries(
+  team=Team.CHICAGO_BULLS,
+  1998
+)
+
+# The team_salaries method supports all output behavior previously described
+```
+
 ### Search 
 
 ```python

diff --git a/basketball_reference_web_scraper/client.py b/basketball_reference_web_scraper/client.py
@@ -7,7 +7,8 @@
 from basketball_reference_web_scraper.writers import CSVWriter, RowFormatter, \
     BOX_SCORE_COLUMN_NAMES, SCHEDULE_COLUMN_NAMES, PLAYER_SEASON_TOTALS_COLUMN_NAMES, \
     PLAYER_ADVANCED_SEASON_TOTALS_COLUMN_NAMES, TEAM_BOX_SCORES_COLUMN_NAMES, PLAY_BY_PLAY_COLUMN_NAMES, \
-    PLAYER_SEASON_BOX_SCORE_COLUMN_NAMES, SearchResultsCSVWriter, SEARCH_RESULTS_COLUMN_NAMES
+    PLAYER_SEASON_BOX_SCORE_COLUMN_NAMES, SearchResultsCSVWriter, SEARCH_RESULTS_COLUMN_NAMES, \
+    SALARY_COLUMN_NAMES
 
 
 def player_box_scores(day, month, year, output_type=None, output_file_path=None, output_write_option=None,
@@ -133,6 +134,27 @@ def players_advanced_season_totals(season_end_year, include_combined_values=Fals
         json_options=json_options,
     )
 
+def team_salaries(team, season_end_year, output_type=None, output_file_path=None, output_write_option=None,
+                  json_options=None):
+    try:
+        http_service = HTTPService(parser=ParserService())
+        values = http_service.team_salaries(team, season_end_year)
+    except requests.exceptions.HTTPError as http_error:
+        if http_error.response.status_code == requests.codes.not_found:
+            raise InvalidSeason(season_end_year=season_end_year)
+        else:
+            raise http_error
+    return output(
+        values=values,
+        output_type=output_type,
+        output_file_path=output_file_path,
+        output_write_option=output_write_option,
+        csv_writer=CSVWriter(
+            column_names=SALARY_COLUMN_NAMES,
+            row_formatter=RowFormatter(data_field_names=SALARY_COLUMN_NAMES)
+        ),
+        json_options=json_options,
+    ) 
 
 def team_box_scores(day, month, year, output_type=None, output_file_path=None, output_write_option=None,
                     json_options=None):

diff --git a/basketball_reference_web_scraper/html.py b/basketball_reference_web_scraper/html.py
@@ -1,5 +1,7 @@
 import re
 
+from basketball_reference_web_scraper.utilities import extract_html_obj_in_comment
+
 
 class BasicBoxScoreRow:
     def __init__(self, html):
@@ -1090,3 +1092,38 @@ def totals_table(self):
             return PlayerPageTotalsTable(html=totals_tables[0])
 
         return None
+
+class PlayerSalaryRow:
+    def __init__(self, html, row_index):
+        self.html = html
+        self.index = row_index
+
+    @property
+    def name(self):
+        return self.html.xpath('//td[@data-stat="player"]')[self.index].text_content()
+
+    @property
+    def salary(self):
+        salary_td = self.html.xpath('//td[@data-stat="salary"]')[self.index]
+        return salary_td.get('csk')
+
+class TeamSalaryTable:
+    def __init__(self, html):
+        self.html = html
+
+    @property
+    def rows(self):
+        # basketball-reference does this weird thing where it puts table data in 
+        # comments on the HTML doc which then gets added to the DOM (I'm guessing)
+        # after a certain amount of time. I assume it is an attempt to make scraping
+        # more difficult. This is evidenced by the fact that if you attempt to load
+        # a page on a team with Javascript disabled you will not be able to see all
+        # the tables. To get around this we just read from the comments.
+        salary_table = extract_html_obj_in_comment(self.html, '//table[@id="salaries2"]')
+        header = salary_table.xpath('//tr')[0]
+        header.getparent().remove(header)
+        row_tags = salary_table.xpath('//tr//th[@class="center"]')
+        return [
+                PlayerSalaryRow(html=row_html, row_index=i)
+                for i, row_html in enumerate(salary_table.xpath('//tr'))
+        ]
diff --git a/basketball_reference_web_scraper/http_service.py b/basketball_reference_web_scraper/http_service.py
@@ -5,7 +5,8 @@
 from basketball_reference_web_scraper.errors import InvalidDate, InvalidPlayerAndSeason
 from basketball_reference_web_scraper.html import DailyLeadersPage, PlayerSeasonBoxScoresPage, PlayerSeasonTotalTable, \
     PlayerAdvancedSeasonTotalsTable, PlayByPlayPage, SchedulePage, BoxScoresPage, DailyBoxScoresPage, SearchPage, \
-    PlayerPage
+    PlayerPage, TeamSalaryTable
+
 
 
 class HTTPService:
@@ -98,6 +99,21 @@ def players_season_totals(self, season_end_year):
         table = PlayerSeasonTotalTable(html=html.fromstring(response.content))
         return self.parser.parse_player_season_totals(totals=table.rows)
 
+    def team_salaries(self, team, season_end_year):
+        url = '{BASE_URL}/teams/{team_abbr}/{end_year}.html'.format(
+            BASE_URL=HTTPService.BASE_URL,
+            team_abbr=TEAM_TO_TEAM_ABBREVIATION[team],
+            end_year=season_end_year
+        )
+
+        response = requests.get(url=url)
+
+        response.raise_for_status()
+
+        table = TeamSalaryTable(html=html.fromstring(response.content))
+
+        return self.parser.parse_team_salary(player_salaries=table.rows)
+
     def schedule_for_month(self, url):
         response = requests.get(url=url)
 

diff --git a/basketball_reference_web_scraper/parser_service.py b/basketball_reference_web_scraper/parser_service.py
@@ -5,7 +5,7 @@
     SecondsPlayedParser, PlayerBoxScoresParser, PlayerAdvancedSeasonTotalsParser, PeriodDetailsParser, \
     PeriodTimestampParser, ScoresParser, PlayByPlaysParser, TeamNameParser, ScheduledStartTimeParser, \
     ScheduledGamesParser, PlayerBoxScoreOutcomeParser, PlayerSeasonBoxScoresParser, SearchResultNameParser, \
-    ResourceLocationParser, SearchResultsParser, LeagueAbbreviationParser, PlayerDataParser
+    ResourceLocationParser, SearchResultsParser, LeagueAbbreviationParser, PlayerDataParser, TeamSalaryParser
 
 
 class ParserService:
@@ -76,6 +76,7 @@ def __init__(self):
             league_abbreviation_parser=self.league_abbreviation_parser,
         )
         self.team_totals_parser = TeamTotalsParser(team_abbreviation_parser=self.team_abbreviation_parser)
+        self.team_salary_parser = TeamSalaryParser()
 
     def parse_play_by_plays(self, play_by_plays, away_team_name, home_team_name):
         return self.play_by_plays_parser.parse(
@@ -106,4 +107,7 @@ def parse_player_search_results(self, nba_aba_baa_players):
         return self.search_results_parser.parse(nba_aba_baa_players=nba_aba_baa_players)
 
     def parse_player_data(self, player):
-        return self.player_data_parser.parse(player=player)
+        return self.player_data_parser.parse(player=player)
+
+    def parse_team_salary(self, player_salaries):
+        return self.team_salary_parser.parse(player_salaries=player_salaries)
diff --git a/basketball_reference_web_scraper/parsers.py b/basketball_reference_web_scraper/parsers.py
@@ -552,3 +552,7 @@ def parse(self, player):
                 )
             )
         }
+
+class TeamSalaryParser:
+    def parse(self, player_salaries):
+        return [{'name': row.name, 'salary': int(row.salary)} for row in player_salaries]
diff --git a/basketball_reference_web_scraper/utilities.py b/basketball_reference_web_scraper/utilities.py
@@ -1,3 +1,5 @@
+from lxml import etree, html
+
 def str_to_int(value, default=int(0)):
     stripped_value = value.strip()
     try:
@@ -18,3 +20,10 @@ def merge_two_dicts(first, second):
     combined = first.copy()
     combined.update(second)
     return combined
+
+def extract_html_obj_in_comment(html_tree, xpath):
+    for node in html_tree.iter(etree.Comment):
+        comment = node.text
+        extracted_html = html.fromstring(comment)
+        if extracted_html.xpath(xpath):
+            return extracted_html
diff --git a/basketball_reference_web_scraper/writers.py b/basketball_reference_web_scraper/writers.py
@@ -7,8 +7,7 @@
 # without doing it this way
 
 SHARED_COLUMN_NAMES = [
-    "team",
-    "location",
+    "team", "location",
     "opponent",
     "outcome",
     "seconds_played",
@@ -130,6 +129,11 @@
     "leagues",
 ]
 
+SALARY_COLUMN_NAMES = [
+    "name",
+    "salary",
+]
+
 
 class WriteOptions:
     def __init__(self, file_path=None, mode=None, custom_options=None):