The problem is your json_data
as a string starts with the '{
. The start index you want is actually one more index value ahead at the {
, so you want to add 2, not 1 to the index start:
index_start = strings.index("('")+2
instead of index_start = strings.index("('")+1
#Import packages and modules
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
#Set up the url and class to scrape
#scrape the single game shot
base_url = "https://understat.com/match/"
match_id = str(input('Please enter the match id: '))
#match_id = '14628'
url = base_url + match_id
print(url)
res = requests.get(url)
soup = BeautifulSoup(res.content, 'lxml')
scripts = soup.find_all('script')
print(scripts)
#only the shot data
strings = scripts[1].string
print(strings)
#strip symbols so we only have the json data
index_start = strings.index("('")+2 # <--- CHANGED HERE
index_end = strings.index("')")
json_data = strings[index_start:index_end]
json_data = json_data.encode('utf8').decode('unicode_escape')
#convert string to json format
data = json.loads(json_data)
print(data)
Output:
{'h': [{'id': '401340', 'minute': '10', 'result': 'MissedShots', 'X': '0.8390000152587891', 'Y': '0.5379999923706055', 'xG': '0.08287161588668823', 'player': 'Marcus Rashford', 'h_a': 'h', 'player_id': '556', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'RightFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'Aaron Wan-Bissaka', 'lastAction': 'Pass'}, {'id': '401342', 'minute': '16', 'result': 'BlockedShot', 'X': '0.9230000305175782', 'Y': '0.705999984741211', 'xG': '0.04452449828386307', 'player': 'Mason Greenwood', 'h_a': 'h', 'player_id': '7490', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'LeftFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'Bruno Fernandes', 'lastAction': 'Chipped'}, {'id': '401344', 'minute': '26', 'result': 'SavedShot', 'X': '0.74', 'Y': '0.37900001525878907', 'xG': '0.019290726631879807', 'player': 'Mason Greenwood', 'h_a': 'h', 'player_id': '7490', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'LeftFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'Aaron Wan-Bissaka', 'lastAction': 'Pass'}, {'id': '401345', 'minute': '27', 'result': 'BlockedShot', 'X': '0.8930000305175781', 'Y': '0.34900001525878904', 'xG': '0.07055725157260895', 'player': 'Mason Greenwood', 'h_a': 'h', 'player_id': '7490', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'RightFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'Aaron Wan-Bissaka', 'lastAction': 'Pass'}, {'id': '401346', 'minute': '29', 'result': 'MissedShots', 'X': '0.919000015258789', 'Y': '0.46599998474121096', 'xG': '0.04169069603085518', 'player': 'Harry Maguire', 'h_a': 'h', 'player_id': '1687', 'situation': 'FromCorner', 'season': '2020', 'shotType': 'Head', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': None, 'lastAction': 'Foul'}, {'id': '401347', 'minute': '38', 'result': 'BlockedShot', 'X': '0.7169999694824218', 'Y': '0.4370000076293945', 'xG': '0.016049593687057495', 'player': 'Bruno Fernandes', 'h_a': 'h', 'player_id': '1228', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'RightFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'Paul Pogba', 'lastAction': 'Pass'}, {'id': '401348', 'minute': '40', 'result': 'SavedShot', 'X': '0.9330000305175781', 'Y': '0.485', 'xG': '0.0721256285905838', 'player': 'Anthony Martial', 'h_a': 'h', 'player_id': '553', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'Head', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'Bruno Fernandes', 'lastAction': 'Pass'}, {'id': '401349', 'minute': '48', 'result': 'MissedShots', 'X': '0.925', 'Y': '0.35700000762939454', 'xG': '0.27813461422920227', 'player': 'Mason Greenwood', 'h_a': 'h', 'player_id': '7490', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'RightFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'Bruno Fernandes', 'lastAction': 'Pass'}, {'id': '401350', 'minute': '51', 'result': 'BlockedShot', 'X': '0.769000015258789', 'Y': '0.35200000762939454', 'xG': '0.019760465249419212', 'player': 'Aaron Wan-Bissaka', 'h_a': 'h', 'player_id': '5584', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'RightFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': None, 'lastAction': 'None'}, {'id': '401352', 'minute': '61', 'result': 'SavedShot', 'X': '0.6830000305175781', 'Y': '0.5070000076293946', 'xG': '0.032827332615852356', 'player': 'Marcus Rashford', 'h_a': 'h', 'player_id': '556', 'situation': 'DirectFreekick', 'season': '2020', 'shotType': 'RightFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': None, 'lastAction': 'Standard'}, {'id': '401353', 'minute': '63', 'result': 'BlockedShot', 'X': '0.77', 'Y': '0.6519999694824219', 'xG': '0.061657458543777466', 'player': 'Bruno Fernandes', 'h_a': 'h', 'player_id': '1228', 'situation': 'DirectFreekick', 'season': '2020', 'shotType': 'RightFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': None, 'lastAction': 'Standard'}, {'id': '401354', 'minute': '63', 'result': 'Goal', 'X': '0.9180000305175782', 'Y': '0.51', 'xG': '0.04361902177333832', 'player': 'Harry Maguire', 'h_a': 'h', 'player_id': '1687', 'situation': 'FromCorner', 'season': '2020', 'shotType': 'Head', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'Alex Telles', 'lastAction': 'Aerial'}, {'id': '401357', 'minute': '76', 'result': 'BlockedShot', 'X': '0.7609999847412109', 'Y': '0.6780000305175782', 'xG': '0.016254646703600883', 'player': 'Alex Telles', 'h_a': 'h', 'player_id': '1828', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'LeftFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': None, 'lastAction': 'None'}, {'id': '401358', 'minute': '76', 'result': 'MissedShots', 'X': '0.924000015258789', 'Y': '0.5070000076293946', 'xG': '0.10926949232816696', 'player': 'Paul Pogba', 'h_a': 'h', 'player_id': '1740', 'situation': 'FromCorner', 'season': '2020', 'shotType': 'Head', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'Alex Telles', 'lastAction': 'Cross'}, {'id': '401359', 'minute': '81', 'result': 'MissedShots', 'X': '0.8630000305175781', 'Y': '0.5570000076293945', 'xG': '0.06367684155702591', 'player': 'Edinson Cavani', 'h_a': 'h', 'player_id': '3294', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'LeftFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': None, 'lastAction': 'None'}, {'id': '401360', 'minute': '85', 'result': 'BlockedShot', 'X': '0.86', 'Y': '0.775', 'xG': '0.02079056017100811', 'player': 'Bruno Fernandes', 'h_a': 'h', 'player_id': '1228', 'situation': 'FromCorner', 'season': '2020', 'shotType': 'RightFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'Luke Shaw', 'lastAction': 'Pass'}], 'a': [{'id': '401341', 'minute': '15', 'result': 'SavedShot', 'X': '0.8690000152587891', 'Y': '0.5479999923706055', 'xG': '0.12690183520317078', 'player': 'Billy Sharp', 'h_a': 'a', 'player_id': '7712', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'LeftFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'John Fleck', 'lastAction': 'Throughball'}, {'id': '401343', 'minute': '22', 'result': 'Goal', 'X': '0.98', 'Y': '0.49400001525878906', 'xG': '0.15722613036632538', 'player': 'Kean Bryan', 'h_a': 'a', 'player_id': '9163', 'situation': 'FromCorner', 'season': '2020', 'shotType': 'Head', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'John Fleck', 'lastAction': 'Cross'}, {'id': '401351', 'minute': '55', 'result': 'MissedShots', 'X': '0.8430000305175781', 'Y': '0.29600000381469727', 'xG': '0.0439084991812706', 'player': 'David McGoldrick', 'h_a': 'a', 'player_id': '7711', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'RightFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'George Baldock', 'lastAction': 'Pass'}, {'id': '401355', 'minute': '73', 'result': 'BlockedShot', 'X': '0.875', 'Y': '0.3609999847412109', 'xG': '0.07747054845094681', 'player': 'Oliver Burke', 'h_a': 'a', 'player_id': '5256', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'RightFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'John Lundstram', 'lastAction': 'Pass'}, {'id': '401356', 'minute': '73', 'result': 'Goal', 'X': '0.8769999694824219', 'Y': '0.3079999923706055', 'xG': '0.28404054045677185', 'player': 'Oliver Burke', 'h_a': 'a', 'player_id': '5256', 'situation': 'OpenPlay', 'season': '2020', 'shotType': 'RightFoot', 'match_id': '14628', 'h_team': 'Manchester United', 'a_team': 'Sheffield United', 'h_goals': '1', 'a_goals': '2', 'date': '2021-01-27 20:15:00', 'player_assisted': 'John Lundstram', 'lastAction': 'Pass'}]}
Also you could replace:
#strip symbols so we only have the json data
index_start = strings.index("('")+2 # <--- CHANGED HERE
index_end = strings.index("')")
json_data = strings[index_start:index_end]
with:
#strip symbols so we only have the json data
json_data = strings.split("('", 1)[-1].rsplit("')",2)[0]