Object Multipart Upload into Bucket
Multipart uploading allows you to upload an object in multiple parts. Each part is a separate piece of object that you can upload independently and in any order. When uploading any part fails, you can reload that part without affecting other parts. When all parts are uploaded, the object is created.
The process of a multipart upload is as follows (these steps are performed by the AWS s3 library in the code sample below):
Send a request to start the multipart uploading process
Load fragmented parts of the file
Sending a request to complete the upload (at this stage, all parts of the file become a main file and the upload process ends here.)
Important note: If during the process of uploading multiple parts, you give up on continuing the upload, you must definitely stop the process so that the volume stored by the incomplete file parts is released.
Components
- Bucket Name
- Object Name
- File Path
Object Name (Key)
Name of the object after uploading to the bucket
File Path (SourceFile)
File path to be uploaded
- .NET
- PHP
- Python
- Javascript
- GO
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
namespace UploadFileMPULowLevelAPIExample
{
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
using Amazon.Runtime;
using Amazon.S3;
using Amazon.S3.Model;
/// <summary>
/// Uses the Amazon Simploe Storage Service (Amazon S3) low-level API to
/// upload an object from the local system to an Amazon S3 bucket. This
/// example was created using the AWS SDK for .NET verion 3.7 and
/// .NET Core 5.0.
/// </summary>
public class UploadFileMPULowLevelAPI
{
private static IAmazonS3 _s3Client;
private const string BUCKET_NAME = "<BUCKET_NAME>";
private static string LOCAL_PATH = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments);
private const string OBJECT_NAME = "<OBJECT_NAME>";
public static async Task Main()
{
string bucketName = BUCKET_NAME;
string keyName = OBJECT_NAME;
string filePath = $"{LOCAL_PATH}/{keyName}";
var awsCredentials = new Amazon.Runtime.BasicAWSCredentials("<ACCESS-KEY>", "<SECRET-KEY>");
var config = new AmazonS3Config { ServiceURL = "<ENDPOINT>" };
_s3Client = new AmazonS3Client(awsCredentials, config);
Console.WriteLine("Uploading an object...");
await UploadObjectAsync(_s3Client, bucketName, keyName, filePath);
}
/// <summary>
/// Uses the low-level API to upload an object from the local system to
/// to an S3 bucket.
/// </summary>
/// <param name="client">The initialized S3 client object used to
/// perform the multi-part upload.</param>
/// <param name="bucketName">>The name of the bucket to which to upload
/// the file.</param>
/// <param name="keyName">The file name to be used in the
/// destination S3 bucket.</param>
/// <param name="filePath">The path, including the file name of the
/// file to be uploaded to the S3 bucket.</param>
public static async Task UploadObjectAsync(
IAmazonS3 client,
string bucketName,
string keyName,
string filePath)
{
// Create list to store upload part responses.
List<UploadPartResponse> uploadResponses = new ();
// Setup information required to initiate the multipart upload.
InitiateMultipartUploadRequest initiateRequest = new ()
{
BucketName = bucketName,
Key = keyName,
};
// Initiate the upload.
InitiateMultipartUploadResponse initResponse =
await client.InitiateMultipartUploadAsync(initiateRequest);
// Upload parts.
long contentLength = new FileInfo(filePath).Length;
long partSize = 400 * (long)Math.Pow(2, 20); // 400 MB
try
{
Console.WriteLine("Uploading parts");
long filePosition = 0;
for (int i = 1; filePosition < contentLength; i++)
{
UploadPartRequest uploadRequest = new ()
{
BucketName = bucketName,
Key = keyName,
UploadId = initResponse.UploadId,
PartNumber = i,
PartSize = partSize,
FilePosition = filePosition,
FilePath = filePath,
};
// Track upload progress.
uploadRequest.StreamTransferProgress +=
new EventHandler<StreamTransferProgressArgs>(UploadPartProgressEventCallback);
// Upload a part and add the response to our list.
uploadResponses.Add(await client.UploadPartAsync(uploadRequest));
filePosition += partSize;
}
// Setup to complete the upload.
CompleteMultipartUploadRequest completeRequest = new ()
{
BucketName = bucketName,
Key = keyName,
UploadId = initResponse.UploadId,
};
completeRequest.AddPartETags(uploadResponses);
// Complete the upload.
CompleteMultipartUploadResponse completeUploadResponse =
await client.CompleteMultipartUploadAsync(completeRequest);
Console.WriteLine($"Object {keyName} added to {bucketName} bucket");
}
catch (Exception exception)
{
Console.WriteLine($"An AmazonS3Exception was thrown: {exception.Message}");
// Abort the upload.
AbortMultipartUploadRequest abortMPURequest = new ()
{
BucketName = bucketName,
Key = keyName,
UploadId = initResponse.UploadId,
};
await client.AbortMultipartUploadAsync(abortMPURequest);
}
}
/// <summary>
/// Handles the UploadProgress even to display the progress of the
/// S3 multi-part upload.
/// </summary>
/// <param name="sender">The object that raised the event.</param>
/// <param name="e">The event parameters.</param>
public static void UploadPartProgressEventCallback(object sender, StreamTransferProgressArgs e)
{
Console.WriteLine($"{e.TransferredBytes}/{e.TotalBytes}");
}
}
}
require 'vendor/autoload.php';
use Aws\S3\S3Client;
use Aws\Exception\AwsException;
use Aws\S3\MultipartUploader;
use Aws\Exception\MultipartUploadException;
$source = '/path/to/large/file.zip';
$uploader = new MultipartUploader($s3Client, $source, [
'bucket' => 'your-bucket',
'key' => 'my-file.zip',
]);
try {
$result = $uploader->upload();
echo "Upload complete: {$result['ObjectURL']}\n";
} catch (MultipartUploadException $e) {
echo $e->getMessage() . "\n";
}
import json
import logging
import os
import pathlib
import sys
import threading
from typing import Optional
import boto3
from boto3.s3.transfer import TransferConfig
from botocore.exceptions import ClientError
# Constant variables
KB = 1024
MB = KB * KB
GB = MB * KB
# Configure logging
logging.basicConfig(level=logging.INFO)
# S3 client instance
s3_client = boto3.client(
's3',
endpoint_url='endpoint_url',
aws_access_key_id='access_key',
aws_secret_access_key='secret_key]
)
class ProgressPercentage:
def __init__(self, file_path: str):
self._file_path = file_path
self._size = float(os.path.getsize(file_path))
self._seen_so_far = 0
self._lock = threading.Lock()
def __call__(self, bytes_amount):
"""
To simplify, assume this is hooked up to a single file_path
:param bytes_amount: uploaded bytes
"""
with self._lock:
self._seen_so_far += bytes_amount
percentage = (self._seen_so_far / self._size) * 100
sys.stdout.write(
"\r%s %s / %s (%.2f%%)" % (self._file_path, self._seen_so_far, self._size, percentage)
)
sys.stdout.flush()
def upload_file(file_path: str, bucket: str, object_name: Optional[str] = None):
"""
Upload a file to an S3 bucket
:param file_path: File to upload
:param bucket: Bucket to upload to
:param object_name: S3 object name. If not specified then file_path is used
:return: True if file was uploaded, else False
"""
# If S3 object_name was not specified, use file_path
if object_name is None:
object_name = file_path
# Upload the file
try:
# Set the desired multipart threshold value (400 MB)
config = TransferConfig(multipart_threshold=400 * MB, max_concurrency=5)
s3_client.upload_file(
file_path,
bucket,
object_name,
ExtraArgs={'ACL': 'public-read'},
Callback=ProgressPercentage(file_path),
Config=config
)
except ClientError as e:
logging.error(e)
return False
return True
# file
object_name = 'file.png'
file_rel_path: str = os.path.join('files', object_name)
file_abs_path: str = os.path.join(base_directory, file_rel_path)
upload_file(file_abs_path, 'sample_bucket', object_name)
const {
S3Client,
CompleteMultipartUploadCommand,
CreateMultipartUploadCommand,
UploadPartCommand,
} = require("@aws-sdk/client-s3");
const path = require("path");
const fs = require("fs");
// Create an S3 client service object
const s3 = new S3Client({
region: "default",
endpoint: "endpoint_url",
credentials: {
accessKeyId: "access_key",
secretAccessKey:
"secret_key",
},
});
// File
var fileName = "file_name";
var filePath = "./" + fileName;
var fileKey = fileName;
var buffer = fs.readFileSync(filePath);
// S3 Upload options
var bucket = "bucket_name";
// Upload
var startTime = new Date();
var partNum = 0;
var partSize = 1024 * 1024 * 5; // Minimum 5MB per chunk (except the last part) http://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadComplete.html
var numPartsLeft = Math.ceil(buffer.length / partSize);
var maxUploadTries = 3;
var multiPartParams = {
Bucket: bucket,
Key: fileKey,
};
var multipartMap = {
Parts: [],
};
async function completeMultipartUpload(s3, doneParams) {
console.log(doneParams);
const completeMultipartUploadResponse = await s3.send(
new CompleteMultipartUploadCommand(doneParams)
);
var delta = (new Date() - startTime) / 1000;
console.log("Completed upload in", delta, "seconds");
console.log("Final upload data:", completeMultipartUploadResponse);
}
async function uploadPart(s3, multipart, partParams, tryNum) {
var tryNum = tryNum || 1;
const uploadPartResponse = await s3.send(new UploadPartCommand(partParams));
console.log(partParams);
multipartMap.Parts[partParams.PartNumber - 1] = {
ETag: uploadPartResponse.ETag,
PartNumber: Number(partParams.PartNumber),
};
console.log("Completed part", partParams.PartNumber);
console.log("mData", uploadPartResponse);
if (--numPartsLeft > 0) return; // complete only when all parts uploaded
var doneParams = {
Bucket: bucket,
Key: fileKey,
MultipartUpload: multipartMap,
UploadId: multipart.UploadId,
};
console.log("Completing upload...");
completeMultipartUpload(s3, doneParams);
}
// Multipart
console.log("Creating multipart upload for:", fileKey);
let createMultipartUploadResponse = {};
s3.send(new CreateMultipartUploadCommand(multiPartParams)).then((value) => {
const createMultipartUploadResponse = value;
for (var rangeStart = 0; rangeStart < buffer.length; rangeStart += partSize) {
partNum++;
var end = Math.min(rangeStart + partSize, buffer.length),
partParams = {
Body: buffer.slice(rangeStart, end),
Bucket: bucket,
Key: fileKey,
PartNumber: String(partNum),
UploadId: createMultipartUploadResponse.UploadId,
};
// Send a single part
console.log(
"Uploading part: #",
partParams.PartNumber,
", Range start:",
rangeStart
);
uploadPart(s3, createMultipartUploadResponse, partParams);
}
});
package main
import (
"bytes"
"fmt"
"net/http"
"os"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/awserr"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
)
const (
maxPartSize = int64(400 * 1024 * 1024)
maxRetries = 3
awsAccessKeyID = "<ACCESS_KEY>"
awsSecretAccessKey = "<SECRET_KEY>"
awsBucketRegion = "default"
awsBucketEndpoint = "<ENDPOINT_URL>"
awsBucketName = "<BUCKET_NAME>"
)
func main() {
creds := credentials.NewStaticCredentials(awsAccessKeyID, awsSecretAccessKey, "")
_, err := creds.Get()
if err != nil {
fmt.Printf("bad credentials: %s", err)
}
cfg := aws.NewConfig().WithRegion(awsBucketRegion).WithCredentials(creds).WithEndpoint(awsBucketEndpoint)
svc := s3.New(session.New(), cfg)
file, err := os.Open("<FILE_NAME>")
if err != nil {
fmt.Printf("err opening file: %s", err)
return
}
defer file.Close()
fileInfo, _ := file.Stat()
size := fileInfo.Size()
buffer := make([]byte, size)
fileType := http.DetectContentType(buffer)
file.Read(buffer)
path := "/media/" + file.Name()
input := &s3.CreateMultipartUploadInput{
Bucket: aws.String(awsBucketName),
Key: aws.String(path),
ContentType: aws.String(fileType),
}
resp, err := svc.CreateMultipartUpload(input)
if err != nil {
fmt.Println(err.Error())
return
}
fmt.Println("Created multipart upload request")
var curr, partLength int64
var remaining = size
var completedParts []*s3.CompletedPart
partNumber := 1
for curr = 0; remaining != 0; curr += partLength {
if remaining < maxPartSize {
partLength = remaining
} else {
partLength = maxPartSize
}
completedPart, err := uploadPart(svc, resp, buffer[curr:curr+partLength], partNumber)
if err != nil {
fmt.Println(err.Error())
err := abortMultipartUpload(svc, resp)
if err != nil {
fmt.Println(err.Error())
}
return
}
remaining -= partLength
partNumber++
completedParts = append(completedParts, completedPart)
}
completeResponse, err := completeMultipartUpload(svc, resp, completedParts)
if err != nil {
fmt.Println(err.Error())
return
}
fmt.Printf("Successfully uploaded file: %s\n", completeResponse.String())
}
func completeMultipartUpload(svc *s3.S3, resp *s3.CreateMultipartUploadOutput, completedParts []*s3.CompletedPart) (*s3.CompleteMultipartUploadOutput, error) {
completeInput := &s3.CompleteMultipartUploadInput{
Bucket: resp.Bucket,
Key: resp.Key,
UploadId: resp.UploadId,
MultipartUpload: &s3.CompletedMultipartUpload{
Parts: completedParts,
},
}
return svc.CompleteMultipartUpload(completeInput)
}
func uploadPart(svc *s3.S3, resp *s3.CreateMultipartUploadOutput, fileBytes []byte, partNumber int) (*s3.CompletedPart, error) {
tryNum := 1
partInput := &s3.UploadPartInput{
Body: bytes.NewReader(fileBytes),
Bucket: resp.Bucket,
Key: resp.Key,
PartNumber: aws.Int64(int64(partNumber)),
UploadId: resp.UploadId,
ContentLength: aws.Int64(int64(len(fileBytes))),
}
for tryNum <= maxRetries {
uploadResult, err := svc.UploadPart(partInput)
if err != nil {
if tryNum == maxRetries {
if aerr, ok := err.(awserr.Error); ok {
return nil, aerr
}
return nil, err
}
fmt.Printf("Retrying to upload part #%v\n", partNumber)
tryNum++
} else {
fmt.Printf("Uploaded part #%v\n", partNumber)
return &s3.CompletedPart{
ETag: uploadResult.ETag,
PartNumber: aws.Int64(int64(partNumber)),
}, nil
}
}
return nil, nil
}
func abortMultipartUpload(svc *s3.S3, resp *s3.CreateMultipartUploadOutput) error {
fmt.Println("Aborting multipart upload for UploadId#" + *resp.UploadId)
abortInput := &s3.AbortMultipartUploadInput{
Bucket: resp.Bucket,
Key: resp.Key,
UploadId: resp.UploadId,
}
_, err := svc.AbortMultipartUpload(abortInput)
return err
}
The following command can be used to execute the aforementioned code, presuming the code file is called s3_get_bucket_cors.go:
go run s3_multipart_upload.go