added webpage-screenshot job processor to media worker

Jobs can now be created in the media queue named `webpage-screenshot`
that will grab a screenshot of a web page, upload the image to storage,
and update a document in the database with information about that image.

- renderSectionTitle now access options.withBorder
- added webpage-screenshot job processor to media worker
- added `puppeteer` and `user-agent` modules to package
master
rob 2 years ago
parent fedb677220
commit fc3e63d284

@ -1,5 +1,6 @@
mixin renderSectionTitle (title, barButton)
.dtp-border-bottom
mixin renderSectionTitle (title, barButton, options)
- options = Object.assign({ withBorder: true }, options);
div(class= options.withBorder ? 'dtp-border-bottom' : 'uk-margin-small')
div(uk-grid).uk-grid-small.uk-flex-middle
.uk-width-expand
h4.uk-margin-small= title

@ -58,6 +58,8 @@ class MediaWorker extends SiteWorker {
await this.loadProcessor(path.join(__dirname, 'media', 'job', 'attachment-ingest.js'));
await this.loadProcessor(path.join(__dirname, 'media', 'job', 'attachment-delete.js'));
await this.loadProcessor(path.join(__dirname, 'media', 'job', 'webpage-screenshot.js'));
await this.startProcessors();
}

@ -0,0 +1,134 @@
// media/job/webpage-screenshot.js
// Copyright (C) 2022 DTP Technologies, LLC
// License: Apache-2.0
'use strict';
const path = require('path');
const fs = require('fs');
const mongoose = require('mongoose');
const puppeteer = require('puppeteer');
const userAgent = require('user-agent');
const { SiteWorkerProcess } = require(path.join(__dirname, '..', '..', '..', '..', 'lib', 'site-lib'));
class WebpageScreenshotJob extends SiteWorkerProcess {
static get COMPONENT ( ) {
return {
name: 'webpageScreenshotJob',
slug: 'webpage-screenshot-job',
};
}
constructor (worker) {
super(worker, WebpageScreenshotJob.COMPONENT);
}
async start ( ) {
await super.start();
const workDirectory = path.join(
process.env.DTP_IMAGE_WORK_PATH,
'webpage-screenshot',
);
await fs.promises.mkdir(workDirectory, { recursive: true });
this.log.info('starting Puppeteer browser engine');
this.browser = await puppeteer.launch();
this.queue = await this.getJobQueue('media');
this.log.info('registering job processor', { queue: this.queue.name, name: 'webpage-screenshot' });
this.queue.process('webpage-screenshot', 1, this.processWebpageScreenshot.bind(this));
}
async stop ( ) {
if (this.browser) {
this.log.info('stopping Puppeteer browser engine');
this.browser.close();
delete this.browser;
}
await super.stop();
}
async processWebpageScreenshot (job) {
const { image: imageService } = this.dtp.services;
const { modelName, documentId, pageUrl } = job.data;
const model = mongoose.model(modelName);
if (!model) {
throw new Error(`Invalid model name specified for document: ${modelName}`);
}
const imageFilename = path.join(process.env.DTP_IMAGE_WORK_PATH, 'webpage-screenshot', `${documentId}.jpg`);
this.log.info('job received to capture webpage screenshot', { modelName, documentId, pageUrl });
try {
job.data.document = await model.findById(documentId);
if (!job.data.document) {
throw new Error(`document not found: ${modelName}:${documentId}`);
}
this.log.info('Opening web page', { modelName, documentId, pageUrl });
job.page = await this.browser.newPage();
if (!job.page) {
throw new Error('failed to create new browser page for capturing screenshot', { modelName, documentId, pageUrl });
}
await job.page.setUserAgent(userAgent.toString());
await job.page.setViewport({
width: 720,
height: 600,
deviceScaleFactor: 1.0,
});
await job.page.goto(pageUrl, { waitUntil: 'networkidle2' });
this.jobLog(job, 'capturing screenshot to file');
await job.page.screenshot({
path: imageFilename,
type: 'jpeg',
quality: 85,
fullPage: false,
});
this.jobLog(job, 'uploading screenshot to storage and database');
const outFileStat = await fs.promises.stat(imageFilename);
const imageDefinition = { };
const imageFile = {
path: imageFilename,
mimetype: 'image/jpeg',
size: outFileStat.size,
};
job.data.screenshotImage = await imageService.create(job.data.semitism.author, imageDefinition, imageFile);
this.jobLog(job, 'updating document with screenshot image');
await model.updateOne(
{ _id: documentId },
{
$set: {
'attachments.screenshot': job.data.screenshotImage._id,
},
},
);
this.jobLog(job, 'screenshot captured and processed successfully');
} catch (error) {
this.log.error('failed to process webpage screenshot', { modelName, documentId, pageUrl, error });
throw error;
} finally {
if (job.page && !job.page.isClosed()) {
this.log.info('closing browser page after capturing screenshot', { modelName, documentId, pageUrl });
await job.page.close();
delete job.page;
}
this.log.info('removing temp screenshot file', { imageFilename });
await fs.promises.rm(imageFilename, { force: true });
}
}
}
module.exports = WebpageScreenshotJob;

@ -65,6 +65,7 @@
"picmo": "^5.4.0",
"pretty-checkbox": "^3.0.3",
"pug": "^3.0.2",
"puppeteer": "^18.0.5",
"qrcode": "^1.5.0",
"rate-limiter-flexible": "^2.3.6",
"rotating-file-stream": "^3.0.3",
@ -83,6 +84,7 @@
"unzalgo": "^3.0.0",
"upload": "^1.3.1",
"url-validation": "^2.1.0",
"user-agent": "^1.0.4",
"uuid": "^8.3.2",
"zxcvbn": "^4.4.2"
},

@ -1088,6 +1088,13 @@
"@types/node" "*"
"@types/webidl-conversions" "*"
"@types/yauzl@^2.9.1":
version "2.10.0"
resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.10.0.tgz#b3248295276cf8c6f153ebe6a9aba0c988cb2599"
integrity sha512-Cn6WYCm0tXv8p6k+A8PvbDG763EDpBoTzHdA+Q/MF6H3sapGjCm9NzoaJncJS9tUKSuCoDs9XHxYYsQDgxR6kw==
dependencies:
"@types/node" "*"
"@webassemblyjs/ast@1.11.1":
version "1.11.1"
resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.11.1.tgz#2bfd767eae1a6996f432ff7e8d7fc75679c0b6a7"
@ -2046,7 +2053,7 @@ buffer-xor@^1.0.3:
resolved "https://registry.yarnpkg.com/buffer-xor/-/buffer-xor-1.0.3.tgz#26e61ed1422fb70dd42e6e36729ed51d855fe8d9"
integrity sha1-JuYe0UIvtw3ULm42cp7VHYVf6Nk=
buffer@^5.5.0, buffer@^5.6.0:
buffer@^5.2.1, buffer@^5.5.0, buffer@^5.6.0:
version "5.7.1"
resolved "https://registry.yarnpkg.com/buffer/-/buffer-5.7.1.tgz#ba62e7c13133053582197160851a8f648e99eed0"
integrity sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==
@ -2682,6 +2689,13 @@ cropperjs@^1.5.12:
resolved "https://registry.yarnpkg.com/cropperjs/-/cropperjs-1.5.12.tgz#d9c0db2bfb8c0d769d51739e8f916bbc44e10f50"
integrity sha512-re7UdjE5UnwdrovyhNzZ6gathI4Rs3KGCBSc8HCIjUo5hO42CtzyblmWLj6QWVw7huHyDMfpKxhiO2II77nhDw==
cross-fetch@3.1.5:
version "3.1.5"
resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.5.tgz#e1389f44d9e7ba767907f7af8454787952ab534f"
integrity sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw==
dependencies:
node-fetch "2.6.7"
cross-spawn@^7.0.3:
version "7.0.3"
resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
@ -2780,6 +2794,13 @@ debug@4.3.2:
dependencies:
ms "2.1.2"
debug@4.3.4, debug@^4.3.4:
version "4.3.4"
resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865"
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
dependencies:
ms "2.1.2"
debug@^3.2.6, debug@^3.2.7:
version "3.2.7"
resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a"
@ -2787,13 +2808,6 @@ debug@^3.2.6, debug@^3.2.7:
dependencies:
ms "^2.1.1"
debug@^4.3.4:
version "4.3.4"
resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865"
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
dependencies:
ms "2.1.2"
debug@~4.1.0:
version "4.1.1"
resolved "https://registry.yarnpkg.com/debug/-/debug-4.1.1.tgz#3b72260255109c6b589cee050f1d516139664791"
@ -2954,6 +2968,11 @@ dev-ip@^1.0.1:
resolved "https://registry.yarnpkg.com/dev-ip/-/dev-ip-1.0.1.tgz#a76a3ed1855be7a012bb8ac16cb80f3c00dc28f0"
integrity sha1-p2o+0YVb56ASu4rBbLgPPADcKPA=
devtools-protocol@0.0.1036444:
version "0.0.1036444"
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1036444.tgz#a570d3cdde61527c82f9b03919847b8ac7b1c2b9"
integrity sha512-0y4f/T8H9lsESV9kKP1HDUXgHxCdniFeJh6Erq+FbdOEvp/Ydp9t8kcAAM5gOd17pMrTDlFWntoHtzzeTUWKNw==
dicer@0.2.5:
version "0.2.5"
resolved "https://registry.yarnpkg.com/dicer/-/dicer-0.2.5.tgz#5996c086bb33218c812c090bddc09cd12facb70f"
@ -3598,6 +3617,17 @@ extglob@^2.0.4:
snapdragon "^0.8.1"
to-regex "^3.0.1"
extract-zip@2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/extract-zip/-/extract-zip-2.0.1.tgz#663dca56fe46df890d5f131ef4a06d22bb8ba13a"
integrity sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==
dependencies:
debug "^4.1.1"
get-stream "^5.1.0"
yauzl "^2.10.0"
optionalDependencies:
"@types/yauzl" "^2.9.1"
fancy-log@^1.3.2, fancy-log@^1.3.3:
version "1.3.3"
resolved "https://registry.yarnpkg.com/fancy-log/-/fancy-log-1.3.3.tgz#dbc19154f558690150a23953a0adbd035be45fc7"
@ -4404,6 +4434,14 @@ http-proxy@^1.18.1:
follow-redirects "^1.0.0"
requires-port "^1.0.0"
https-proxy-agent@5.0.1:
version "5.0.1"
resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz#c59ef224a04fe8b754f3db0063a25ea30d0005d6"
integrity sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==
dependencies:
agent-base "6"
debug "4"
https-proxy-agent@^5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.0.tgz#e2a90542abb68a762e0a0850f6c9edadfd8506b2"
@ -5892,6 +5930,13 @@ node-fetch@2:
dependencies:
whatwg-url "^5.0.0"
node-fetch@2.6.7:
version "2.6.7"
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.7.tgz#24de9fba827e3b4ae44dc8b20256a379160052ad"
integrity sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==
dependencies:
whatwg-url "^5.0.0"
node-gyp-build@^4.2.3:
version "4.3.0"
resolved "https://registry.yarnpkg.com/node-gyp-build/-/node-gyp-build-4.3.0.tgz#9f256b03e5826150be39c764bf51e993946d71a3"
@ -6560,6 +6605,11 @@ process-nextick-args@^2.0.0, process-nextick-args@~2.0.0:
resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2"
integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==
progress@2.0.3:
version "2.0.3"
resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
promise@^7.0.1:
version "7.3.1"
resolved "https://registry.yarnpkg.com/promise/-/promise-7.3.1.tgz#064b72602b18f90f29192b8b1bc418ffd1ebd3bf"
@ -6575,6 +6625,11 @@ proxy-addr@~2.0.7:
forwarded "0.2.0"
ipaddr.js "1.9.1"
proxy-from-env@1.1.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2"
integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==
prr@~1.0.1:
version "1.0.1"
resolved "https://registry.yarnpkg.com/prr/-/prr-1.0.1.tgz#d3fc114ba06995a45ec6893f484ceb1d78f5f476"
@ -6742,6 +6797,23 @@ pupa@^2.1.1:
dependencies:
escape-goat "^2.0.0"
puppeteer@^18.0.5:
version "18.0.5"
resolved "https://registry.yarnpkg.com/puppeteer/-/puppeteer-18.0.5.tgz#873223b17b92345182c5b5e8cfbd6f3117f1547d"
integrity sha512-s4erjxU0VtKojPvF+KvLKG6OHUPw7gO2YV1dtOsoryyCbhrs444fXb4QZqGWuTv3V/rgSCUzeixxu34g0ZkSMA==
dependencies:
cross-fetch "3.1.5"
debug "4.3.4"
devtools-protocol "0.0.1036444"
extract-zip "2.0.1"
https-proxy-agent "5.0.1"
progress "2.0.3"
proxy-from-env "1.1.0"
rimraf "3.0.2"
tar-fs "2.1.1"
unbzip2-stream "1.4.3"
ws "8.8.1"
qrcode@^1.5.0:
version "1.5.0"
resolved "https://registry.yarnpkg.com/qrcode/-/qrcode-1.5.0.tgz#95abb8a91fdafd86f8190f2836abbfc500c72d1b"
@ -7166,6 +7238,13 @@ ret@~0.1.10:
dependencies:
glob "^7.1.3"
rimraf@3.0.2:
version "3.0.2"
resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a"
integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==
dependencies:
glob "^7.1.3"
ripemd160@^2.0.0, ripemd160@^2.0.1:
version "2.0.2"
resolved "https://registry.yarnpkg.com/ripemd160/-/ripemd160-2.0.2.tgz#a1c1a6f624751577ba5d07914cbc92850585890c"
@ -8021,7 +8100,7 @@ tapable@^2.1.1, tapable@^2.2.0:
resolved "https://registry.yarnpkg.com/tapable/-/tapable-2.2.1.tgz#1967a73ef4060a82f12ab96af86d52fdb76eeca0"
integrity sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==
tar-fs@^2.0.0, tar-fs@^2.1.1:
tar-fs@2.1.1, tar-fs@^2.0.0, tar-fs@^2.1.1:
version "2.1.1"
resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.1.tgz#489a15ab85f1f0befabb370b7de4f9eb5cbe8784"
integrity sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==
@ -8355,6 +8434,14 @@ unbox-primitive@^1.0.1:
has-symbols "^1.0.2"
which-boxed-primitive "^1.0.2"
unbzip2-stream@1.4.3:
version "1.4.3"
resolved "https://registry.yarnpkg.com/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz#b0da04c4371311df771cdc215e87f2130991ace7"
integrity sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg==
dependencies:
buffer "^5.2.1"
through "^2.3.8"
unc-path-regex@^0.1.2:
version "0.1.2"
resolved "https://registry.yarnpkg.com/unc-path-regex/-/unc-path-regex-0.1.2.tgz#e73dd3d7b0d7c5ed86fbac6b0ae7d8c6a69d50fa"
@ -8530,6 +8617,11 @@ use@^3.1.0:
resolved "https://registry.yarnpkg.com/use/-/use-3.1.1.tgz#d50c8cac79a19fbc20f2911f56eb973f4e10070f"
integrity sha512-cwESVXlO3url9YWlFW/TA9cshCEhtu7IKJ/p5soJ/gGpj7vbvFrAY/eIioQ6Dw23KjZhYgiIo8HOs1nQ2vr/oQ==
user-agent@^1.0.4:
version "1.0.4"
resolved "https://registry.yarnpkg.com/user-agent/-/user-agent-1.0.4.tgz#61201431fc7e84ea4a5e1e76392f163a1539c9a4"
integrity sha512-NPTnJ89e6ttUK+Q3ZQ6aMFo4+4HAdvsb39IypyRw/bPjE/F8TjeVpB8uqFPnUCVbI6247qPryd8OLpkEYuOwWg==
util-deprecate@^1.0.1, util-deprecate@~1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
@ -9076,6 +9168,11 @@ write-file-atomic@^3.0.0:
signal-exit "^3.0.2"
typedarray-to-buffer "^3.1.5"
ws@8.8.1:
version "8.8.1"
resolved "https://registry.yarnpkg.com/ws/-/ws-8.8.1.tgz#5dbad0feb7ade8ecc99b830c1d77c913d4955ff0"
integrity sha512-bGy2JzvzkPowEJV++hF07hAD6niYSr0JzBNo/J29WsB57A2r7Wlc1UFcTR9IzrPvuNVO4B8LGqF8qcpsVOhJCA==
ws@^8.2.3:
version "8.3.0"
resolved "https://registry.yarnpkg.com/ws/-/ws-8.3.0.tgz#7185e252c8973a60d57170175ff55fdbd116070d"
@ -9231,7 +9328,7 @@ yargs@^7.1.0:
y18n "^3.2.1"
yargs-parser "^5.0.1"
"yauzl@2.9.2 - 2.10.0":
"yauzl@2.9.2 - 2.10.0", yauzl@^2.10.0:
version "2.10.0"
resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9"
integrity sha1-x+sXyT4RLLEIb6bY5R+wZnt5pfk=

Loading…
Cancel
Save