1.背景介绍
在AWS平台下,利用S3存储桶自动触发Lambda完成PDF缩略图的生成。使用AWS CDK 完成应用的全栈部署。
文中使用AWS海外帐号进行演示,并在最后对此示例在AWS中国区域的差异进行补充说明。也可直接下载文中提供的源代码在AWS海外、中国区进行自动化部署。
1.1AWS CDK
预置云应用程序是一个具有挑战性的过程,您需要执行手工操作、编写自定义脚本、维护模板或学习特定领域的语言。
AWS 云开发工具包 (AWS CDK) 是一种开源软件开发框架,用以使用熟悉的编程语言(目前支持TypeScript, JavaScript, Python, C# 和 Java)模拟和预置云应用程序资源。AWS CDK 通过 AWS CloudFormation 以安全、可重复的方式预置AWS资源,使开发人员能够更加友好的方式预置AWS应用。
1.2Ghostscript
Ghostscript是一套建基于Adobe、PostScript及可移植文档格式(PDF)的页面描述语言等而编译成的自由软件(GNU Affero GPL license)。本文在Lambda中使用Ghostscript完成pdf到缩略图的转换工作。
2.架构介绍
为PDF文件生成缩略图的流程:
用户上传PDF文件到S3存储桶,事件触发Lambda进行格式转换工作,并将缩略图保存到S3存储桶。
3.从零开始构建应用程序及其部署
示例使用Amazon AMI 2启动的EC2实例做为编程、部署环境。
3.1[可选]构建支持Ghostscript的Lambda运行环境需要的
下面会使用AWS Lambda Layer,将Ghostscript添加到Lambda的运行环境之中。
$ sudo su –
# yum install git docker -y
# systemctl start docker
# vi make
添加内容:
.ONESHELL:
init:
curl -OL https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/gs927/ghostscript-9.27.tar.gz
gs /opt/bin/gs: ghostscript-9.27.tar.gz
tar -zxf $<
cd ghostscript-9.27
./configure \
--without-luratech \
--prefix=/opt/
make all
make install
开始构建Ghostscript的Lambda运行环境
# docker run -it --rm -w=/var/task/build -v $PWD:/var/task -v $PWD:/opt/ --entrypoint /usr/bin/make -t lambci/lambda-base-2:build -f ../make init all
# zip -r gs.zip bin share
构建中使用lambci/lambda-base-2:build为基础镜像,此镜像提供了AWS Lambda运行的沙盒本地环境,在此环境中可以build各种Lambda自定义运行环境。本文构建ghostscript运行环境,并输出gs.zip包,在后面的AWS CDK自动部署中,将使用此包发布AWS Lambda Layer,为Lambda提供Ghostscript运行环境。
3.2安装nodejs
# curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
# . ~/.nvm/nvm.sh
# nvm install node
# node --version
3.3安装 AWS-CDK
# npm install -g aws-cdk
# cdk --version
3.4构建AWS CDK应用程序
# mkdir thumbnail && cd thumbnail/
# cdk init --language javascript
# npm i @aws-cdk/aws-iam @aws-cdk/aws-lambda @aws-cdk/aws-lambda-event-sources @aws-cdk/aws-logs @aws-cdk/aws-s3 @aws-cdk/aws-s3-notifications @aws-cdk/core path
# mkdir tmp
# cp ../gs.zip ./tmp/
# mkdir pdf2jpg-lambda
# vi pdf2jpg-lambda/child-process-promise.js
/*global module, require, console, Promise */
'use strict';
const childProcess = require('child_process'),
spawnPromise = function (command, argsarray, envOptions) {
return new Promise((resolve, reject) => {
console.log('executing', command, argsarray.join(' '));
const childProc = childProcess.spawn(command, argsarray, envOptions || { env: process.env, cwd: process.cwd() }),
resultBuffers = [];
childProc.stdout.on('data', buffer => {
console.log(buffer.toString());
resultBuffers.push(buffer);
});
childProc.stderr.on('data', buffer => console.error(buffer.toString()));
childProc.on('exit', (code, signal) => {
console.log(`${command} completed with ${code}:${signal}`);
if (code || signal) {
reject(`${command} failed with ${code || signal}`);
} else {
resolve(Buffer.concat(resultBuffers).toString().trim());
}
});
});
};
module.exports = {
spawn: spawnPromise
};
# vi pdf2jpg-lambda/s3-util.js
/*global module, require, Promise, console */
const aws = require('aws-sdk'),
fs = require('fs'),
s3 = new aws.S3(),
downloadFileFromS3 = function (bucket, fileKey, filePath) {
'use strict';
console.log('downloading', bucket, fileKey, filePath);
return new Promise(function (resolve, reject) {
const file = fs.createWriteStream(filePath),
stream = s3.getObject({
Bucket: bucket,
Key: fileKey
}).createReadStream();
stream.on('error', reject);
file.on('error', reject);
file.on('finish', function () {
console.log('downloaded', bucket, fileKey);
resolve(filePath);
});
stream.pipe(file);
});
}, uploadFileToS3 = function (bucket, fileKey, filePath, contentType) {
'use strict';
console.log('uploading', bucket, fileKey, filePath);
return s3.upload({
Bucket: bucket,
Key: fileKey,
Body: fs.createReadStream(filePath),
ACL: 'private',
ContentType: contentType
}).promise();
};
module.exports = {
downloadFileFromS3: downloadFileFromS3,
uploadFileToS3: uploadFileToS3
};
# vi pdf2jpg-lambda/index.js
const s3Util = require('./s3-util'),
childProcessPromise = require('./child-process-promise'),
path = require('path'),
os = require('os'),
EXTENSION = process.env.EXTENSION,
OUTPUT_BUCKET = process.env.OUTPUT_BUCKET,
MIME_TYPE = process.env.MIME_TYPE;
exports.handler = function (eventObject, context) {
console.log('eventObject', eventObject);;
const eventRecord = eventObject.Records && eventObject.Records[0],
inputBucket = eventRecord.s3.bucket.name,
key = eventRecord.s3.object.key,
id = context.awsRequestId,
resultKey = 'thumbnail/' + key.replace(/\.[^.]+$/, EXTENSION),
workdir = os.tmpdir(),
inputFile = path.join(workdir, id + path.extname(key)),
outputFile = path.join(workdir, 'converted-' + id + EXTENSION);
console.log('converting', inputBucket, key, 'using', inputFile);
return s3Util.downloadFileFromS3(inputBucket, key, inputFile)
.then(() => childProcessPromise.spawn(
'/opt/bin/gs',
['-sDEVICE=jpeggray','-dSAFER','-dBATCH', '-dNOPAUSE','-sPageList=1','-o', outputFile, inputFile],
{env: process.env, cwd: workdir}
))
.then(() => s3Util.uploadFileToS3(OUTPUT_BUCKET, resultKey, outputFile, MIME_TYPE));
};
Ghostscript 常用参数说明, https://www.ghostscript.com/doc/current/Use.htm:
DEVICE |
Ghostscript 输出的格式 |
SAFER |
启用对文件的访问控制 |
BATCH |
执行结束后退出 |
NOPAUSE |
禁用提示和暂停 |
PageList |
转换的页数 |
#vi lib/thumbnail-stack.js
const cdk = require('@aws-cdk/core');
const s3 = require('@aws-cdk/aws-s3');
const lambda = require('@aws-cdk/aws-lambda');
const iam = require('@aws-cdk/aws-iam')
const path = require('path');
const s3n = require('@aws-cdk/aws-s3-notifications');
class ThumbnailStack extends cdk.Stack {
/**
*
* @param {cdk.Construct} scope
* @param {string} id
* @param {cdk.StackProps=} props
*/
constructor(scope, id, props) {
super(scope, id, props);
// 为保存PDF和缩略图创建S3存储桶
const imgBucket = new s3.Bucket(this, 'img-bucket', {
});
//ghostscript environment in lambda layer
const layer = new lambda.LayerVersion(this, 'ghostscript-layer', {
code: lambda.Code.fromAsset(path.join(__dirname, '../tmp/gs.zip')),
compatibleRuntimes: [lambda.Runtime.NODEJS_10_X],
license: 'Apache-2.0',
description: 'A layer to host ghostscript',
});
//lambda IAM role
const pdf2jpgLambdaRole = new iam.Role(this, 'pdf2jpg-lambda-Role', {
assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com')
});
//lambda IAM role policy
pdf2jpgLambdaRole.addToPolicy(new iam.PolicyStatement({
resources: [imgBucket.bucketArn + '/*'],
actions: ['s3:PutObject', 's3:GetObject']
}));
pdf2jpgLambdaRole.addToPolicy(new iam.PolicyStatement({
resources: ['*'],
actions: ['logs:CreateLogGroup', 'logs:CreateLogStream', 'logs:PutLogEvents']
}));
//create lambda function
const pdf2jpgLambda = new lambda.Function(this, 'pdf2jpg-lambda', {
code: lambda.Code.asset('pdf2jpg-lambda'),
handler: 'index.handler',
runtime: lambda.Runtime.NODEJS_10_X,
layers: [layer],
timeout: cdk.Duration.minutes(5),
role: pdf2jpgLambdaRole,
memorySize: 512,
environment: {
EXTENSION: '.jpg',
MIME_TYPE: 'image/jpeg',
OUTPUT_BUCKET: imgBucket.bucketName
}
});
//create lambda function s3 trigger event
imgBucket.addEventNotification(s3.EventType.OBJECT_CREATED, new s3n.LambdaDestination(pdf2jpgLambda), { suffix: '.pdf' });
}
}
module.exports = { ThumbnailStack }
- 设置AWS CDK 运行时需要的ACCESS KEY 和SECRET KEY等信息。
# aws configure
# cdk bootstrap
# cdk deploy
3.5验证
在CloudFormation中,可以看到所有创建的资源:
在AWS CDK创建的S3存储桶中上传pdf,自动生成缩略图
3.6删除所有实验资源
4.自动化完成全栈部署
[AWS海外]使用Amazon AMI 2启动的EC2实例做为部署环境,下载并解压附件
$ sudo su –
# unzip thumbnail_ww.zip
# bash ./installEnv.sh
# source ~/.bash_profile
# npm install && cdk bootstrap && cdk deploy
5.AWS中国区部署
[AWS中国区]使用Amazon AMI 2启动的EC2实例做为部署环境,下载并解压附件
$ sudo su –
# unzip thumbnail_cn.zip
# bash ./installEnv.sh
# source ~/.bash_profile
# npm install && cdk bootstrap && cdk deploy
截止当前(2019年10月),AWS 中国区Lambda 暂不支持环境变量,部署之后需修改部署的Lambda 中关于环境变量的代码:
修改为:
EXTENSION = 'jpg',
OUTPUT_BUCKET = 's3存储桶名(在cloudformation的资源中获得)',
MIME_TYPE = 'image/jpeg';
5.1中国区部署差异说明
- 在installEnv.sh 中,为npm, Dockerhub 设置中国加速,
npm config set registry http://r.cnpmjs.org
cat << EOF > /etc/docker/daemon.json
{
"registry-mirrors": ["http://registry.docker-cn.com"]
}
EOF
- 在lib/ thumbnail-stack.js中去除Lambda暂不支持的环境变量
本篇作者