{corona-virus-intro}
This is post will help you to export Statsbomb free data to BigQuery. Before you even look at the first line of code go to Statsbomb Resource Center and sign StatsBomb Public Data User Agreement. If you don’t do that, feds will do a sweep.
Classic
from google.cloud import bigquery
import os
import requests
import json
from io import StringIO
This is some longass cell but schema is quite important here. First of all you can see how well BQ handles JSONs and what’s actually more important, autogenerated schema is not really reliable. Also few useful functions to make working with BQ easier.
SCHEMA =[
{
"mode": "NULLABLE",
"name": "competition_id",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "season_id",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "country_name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "competition_name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "season_name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "match_id",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "match_date",
"type": "DATE"
},
{
"mode": "REPEATED",
"name": "location",
"type": "FLOAT"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "position",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "player",
"type": "RECORD"
},
{
"fields": [
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "body_part",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "type",
"type": "RECORD"
},
{
"mode": "REPEATED",
"name": "end_location",
"type": "FLOAT"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "outcome",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "height",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "angle",
"type": "FLOAT"
},
{
"mode": "NULLABLE",
"name": "length",
"type": "FLOAT"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "recipient",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "switch",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "aerial_won",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "cross",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "deflected",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "assisted_shot_id",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "shot_assist",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "goal_assist",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "through_ball",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "outswinging",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "inswinging",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "no_touch",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "cut_back",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "miscommunication",
"type": "BOOLEAN"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "technique",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "straight",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "out",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "backheel",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "pass",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "team",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "possession_team",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "possession",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "second",
"type": "INTEGER"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "play_pattern",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "minute",
"type": "INTEGER"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "type",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "timestamp",
"type": "TIME"
},
{
"mode": "NULLABLE",
"name": "period",
"type": "INTEGER"
},
{
"mode": "REPEATED",
"name": "related_events",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "off_camera",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "out",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "counterpress",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "duration",
"type": "FLOAT"
},
{
"mode": "NULLABLE",
"name": "index",
"type": "INTEGER"
},
{
"fields": [
{
"fields": [
{
"mode": "NULLABLE",
"name": "jersey_number",
"type": "INTEGER"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "position",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "player",
"type": "RECORD"
}
],
"mode": "REPEATED",
"name": "lineup",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "formation",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "tactics",
"type": "RECORD"
},
{
"fields": [
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "outcome",
"type": "RECORD"
}
],
"mode": "NULLABLE",
"name": "ball_receipt",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "under_pressure",
"type": "BOOLEAN"
},
{
"fields": [
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "body_part",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "head",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "left_foot",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "right_foot",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "out",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "aerial_won",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "other",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "clearance",
"type": "RECORD"
},
{
"fields": [
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "type",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "outcome",
"type": "RECORD"
}
],
"mode": "NULLABLE",
"name": "duel",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "out",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "aerial_won",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "miscontrol",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "out",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "recovery_failure",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "offensive",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "ball_recovery",
"type": "RECORD"
},
{
"fields": [
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "outcome",
"type": "RECORD"
}
],
"mode": "NULLABLE",
"name": "interception",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "out",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "deflection",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "offensive",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "save_block",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "block",
"type": "RECORD"
},
{
"fields": [
{
"fields": [
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "position",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "player",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "teammate",
"type": "BOOLEAN"
},
{
"mode": "REPEATED",
"name": "location",
"type": "FLOAT"
}
],
"mode": "REPEATED",
"name": "freeze_frame",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "body_part",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "type",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "technique",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "key_pass_id",
"type": "STRING"
},
{
"mode": "REPEATED",
"name": "end_location",
"type": "FLOAT"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "outcome",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "statsbomb_xg",
"type": "FLOAT"
},
{
"mode": "NULLABLE",
"name": "open_goal",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "first_time",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "deflected",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "one_on_one",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "aerial_won",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "out",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "saved_off_target",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "follows_dribble",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "saved_to_post",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "redirect",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "kick_off",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "shot",
"type": "RECORD"
},
{
"fields": [
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "position",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "type",
"type": "RECORD"
},
{
"mode": "REPEATED",
"name": "end_location",
"type": "FLOAT"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "technique",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "outcome",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "body_part",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "out",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "punched_out",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "shot_saved_off_target",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "shot_saved_to_post",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "success_in_play",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "saved_to_post",
"type": "BOOLEAN"
}, {
"mode": "NULLABLE",
"name": "lost_out",
"type": "BOOLEAN"
}, {
"mode": "NULLABLE",
"name": "success_out",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "lost_in_play",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "penalty_saved_to_post",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "goalkeeper",
"type": "RECORD"
},
{
"fields": [
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "outcome",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "nutmeg",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "out",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "overrun",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "no_touch",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "dribble",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "advantage",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "defensive",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "offensive",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "penalty",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "foul_won",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "offensive",
"type": "BOOLEAN"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "type",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "advantage",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "penalty",
"type": "BOOLEAN"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "card",
"type": "RECORD"
}
],
"mode": "NULLABLE",
"name": "foul_committed",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "late_video_start",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "half_start",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "early_video_end",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "half_end",
"type": "RECORD"
},
{
"fields": [
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "replacement",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "outcome",
"type": "RECORD"
}
],
"mode": "NULLABLE",
"name": "substitution",
"type": "RECORD"
},
{
"fields": [
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "card",
"type": "RECORD"
}
],
"mode": "NULLABLE",
"name": "bad_behaviour",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "in_chain",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "injury_stoppage",
"type": "RECORD"
},
{
"fields": [
{
"fields": [
{
"mode": "NULLABLE",
"name": "name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "id",
"type": "INTEGER"
}
],
"mode": "NULLABLE",
"name": "outcome",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "out",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "_50_50",
"type": "RECORD"
},
{
"fields": [
{
"mode": "NULLABLE",
"name": "permanent",
"type": "BOOLEAN"
}
],
"mode": "NULLABLE",
"name": "player_off",
"type": "RECORD"
},
{
"fields": [
{
"mode": "REPEATED",
"name": "end_location",
"type": "FLOAT"
}
],
"mode": "NULLABLE",
"name": "carry",
"type": "RECORD"
}
]
def _get_field_schema(field):
name = field['name']
field_type = field.get('type', 'STRING')
mode = field.get('mode', 'NULLABLE')
fields = field.get('fields', [])
if fields:
subschema = []
for f in fields:
fields_res = _get_field_schema(f)
subschema.append(fields_res)
else:
subschema = []
field_schema = bigquery.SchemaField(name=name,
field_type=field_type,
mode=mode,
fields=subschema
)
return field_schema
def parse_bq_json_schema(schema_variable):
schema = []
schema_variable
for field in schema_variable:
schema.append(_get_field_schema(field))
return schema
def if_tbl_exists(client, table_ref):
from google.cloud.exceptions import NotFound
try:
client.get_table(table_ref)
return True
except NotFound:
return False
To run this cell you need to create Service Account for your Google Cloud. I’m not going to explain how to do this.Go here. Or use google (or duck duck go if you care about privacy but then you won’t like GCP much).
Code below authenticate your notebook to access BQ. You can query, remove tables, datasets, add new rows. Lots of stuff. We just create dataset (first check if one exists - rows 8 to 13). Define table name (row 15) and options of your import (rows 18 and 19)
# Construct a BigQuery client object.
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.normpath("sa.json")
client = bigquery.Client()
dataset_id = "{}.statsbomb".format(client.project)
dataset = bigquery.Dataset(dataset_id)
dataset.location = "US"
try:
dataset = client.get_dataset(dataset)
print(" {}.{} exists".format(client.project, dataset.dataset_id))
except:
dataset = client.create_dataset(dataset)
print("Created dataset {}.{}".format(client.project, dataset.dataset_id))
table_id = "free_data" #your table name
table = dataset.table(table_id)
job_config = bigquery.LoadJobConfig()
job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
job_config.schema = parse_bq_json_schema(SCHEMA)
deductive-motif-114121.statsbomb exists
Here we getting list of all competitions.
competition_url = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/competitions.json"
r = requests.get(competition_url)
competitions = r.json()
Here we getting all free matches.
all_matches = []
for i in competitions:
url = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/matches/{comp_id}/{season_id}.json".format(
comp_id=i['competition_id'],
season_id=i['season_id'])
r = requests.get(url)
matches = r.json()
for j in matches:
all_matches.append(j)
This list stores all processed match ids. Just in case (schema may change, then we need to make update, so it’s good to know what broke the upload).
processed_events = []
All the magic happens here. We loop over all matches, get the data, assign match_id and match_date (not avaible in api call). Change the name of one of the colums (in BQ column name can not begins with number - row 11,12).
Rows 14, 15 and 16, are weirdly looking but BQ accepts only new line delimited json. More info. Then string is converted to bytes and we can upload it as file.
for i in all_matches:
if i['match_id'] not in processed_events:
match_events = []
url = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/events/{match_id}.json".format(
match_id=i['match_id'])
r = requests.get(url)
events = r.json()
for event in events:
event['match_id'] = i['match_id']
event['match_date'] = i['match_date']
event["competition_id"] = i['competition']['competition_id']
event["country_name"] = i['competition']['country_name']
event["competition_name"] = i['competition']['competition_name']
event["season_id"] = i['season']['season_id']
event["season_name"] = i['season']['season_name']
for key in list(event):
event["_50_50"] = event.pop(key) if key == "50_50" else None
match_events.append(event)
result = [json.dumps(record) for record in match_events]
result = '\n'.join(result)
data_as_file = StringIO(result)
job = client.load_table_from_file(data_as_file, table, job_config = job_config)
job.result() # Waits for table load to complete.
processed_events.append(i['match_id'])
But why to even bother. To easily connect BQ to dashboarding tool. Below example of how to create view that can be connected to data studio.
viewSQL = """
SELECT player.name,count(distinct id) quality_shots, COUNT(DISTINCT IF(shot.outcome.name="Goal", id,null)) goals_from_quality_shots FROM `deductive-motif-114121.statsbomb.free_data`
where competition_name = "FA Women's Super League" and season_name ="2019/2020" and shot.statsbomb_xg >0.2
group by 1
"""
table = dataset.table(view_name)
table.view_query = viewSQL
table.create()
Not pretty but easy to make (10 clicks) dashboard.