Interacting with AWS Glue

In [1]:
import boto3
In [2]:
boto3.__version__
Out[2]:
'1.9.103'
In [3]:
def get_databases():
    """
    Returns the databases available in the Glue data catalog

    :return: list of databases
    """
    return [dat["Name"] for dat in glue_client.get_databases()["DatabaseList"]]
In [4]:
def get_tables_for_database(database):
    """
    Returns a list of tables in a Glue database catalog

    :param database: Glue database
    :return: list of tables
    """
    starting_token = None
    next_page = True
    tables = []
    while next_page:
        paginator = glue_client.get_paginator(operation_name="get_tables")
        response_iterator = paginator.paginate(
            DatabaseName=database,
            PaginationConfig={"PageSize": 100, "StartingToken": starting_token},
        )
        for elem in response_iterator:
            tables += [
                {
                    "name": table["Name"],
                }
                for table in elem["TableList"]
            ]
            try:
                starting_token = elem["NextToken"]
            except:
                next_page = False
    return tables

Setup the Glue client with boto3:

In [5]:
glue_client = boto3.client('glue', 'eu-west-1')

Create two tables in the default database:

In [6]:
params = {
    'DatabaseName': 'default',
    'TableInput': {
        'Name': 'table_one',
    }
}
glue_client.create_table(**params)
params['TableInput'].update({'Name': 'table_two'})
glue_client.create_table(**params)
Out[6]:
{'ResponseMetadata': {'RequestId': 'acd584c0-5536-11e9-9615-03d279e216a7',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Tue, 02 Apr 2019 11:01:32 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '2',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'acd584c0-5536-11e9-9615-03d279e216a7'},
  'RetryAttempts': 0}}

List the tables from the databases that contain the string default:

In [7]:
for database in [dat for dat in get_databases() if 'default' in dat]:
    print(f"Database: {database}")
    for table in get_tables_for_database(database):
        print(f"Table:    {table['name']}")
Database: default
Table:    table_one
Table:    table_two

Clean-up:

In [8]:
params = {
    'DatabaseName': 'default',
    'Name': 'table_one',
}
glue_client.delete_table(**params)
params.update({'Name': 'table_two'})
glue_client.delete_table(**params)
Out[8]:
{'ResponseMetadata': {'RequestId': 'ad5c689b-5536-11e9-b79a-7706853c390d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Tue, 02 Apr 2019 11:01:33 GMT',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '2',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'ad5c689b-5536-11e9-b79a-7706853c390d'},
  'RetryAttempts': 0}}

Verification:

In [9]:
for database in [dat for dat in get_databases() if 'default' in dat]:
    print(f"Database: {database}")
    for table in get_tables_for_database(database):
        print(f"Table:    {table['name']}")
Database: default