Write a Pandas dataframe to CSV on S3

Write a pandas dataframe to a single CSV file on S3.

import boto3
from io import StringIO

DESTINATION = 'my-bucket'

def _write_dataframe_to_csv_on_s3(dataframe, filename):
    """ Write a dataframe to a CSV on S3 """
    print("Writing {} records to {}".format(len(dataframe), filename))
    # Create buffer
    csv_buffer = StringIO()
    # Write dataframe to buffer
    dataframe.to_csv(csv_buffer, sep="|", index=False)
    # Create S3 object
    s3_resource = boto3.resource("s3")
    # Write buffer to S3 object
    s3_resource.Object(DESTINATION, filename).put(Body=csv_buffer.getvalue())

_write_dataframe_to_csv_on_s3(my_df, 'my-folder')

Gist

Write a Pandas dataframe to Parquet on S3

Write a pandas dataframe to a single Parquet file on S3.

# Note: make sure `s3fs` is installed in order to make Pandas use S3.
#       Credentials for AWS in the normal location ~/.aws/credentials
DESTINATION = 'my-bucket'

def _write_dataframe_to_parquet_on_s3(dataframe, filename):
    """ Write a dataframe to a Parquet on S3 """
    print("Writing {} records to {}".format(len(dataframe), filename))
    output_file = f"s3://{DESTINATION}/{filename}/data.parquet"
    dataframe.to_parquet(output_file)

_write_dataframe_to_parquet_on_s3(my_df, 'my-folder')

Gist