You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

137 lines
4.4 KiB

// media/job/webpage-screenshot.js
// Copyright (C) 2022 DTP Technologies, LLC
// License: Apache-2.0
'use strict';
const path = require('path');
const fs = require('fs');
const mongoose = require('mongoose');
const puppeteer = require('puppeteer');
const UserAgent = require('user-agents');
const { SiteWorkerProcess } = require(path.join(__dirname, '..', '..', '..', '..', 'lib', 'site-lib'));
class WebpageScreenshotJob extends SiteWorkerProcess {
static get COMPONENT ( ) {
return {
logId: 'wrk:chat:webpage-screenshot:job',
index: 'webpageScreenshotJob',
className: 'WebpageScreenshotJob',
};
}
constructor (worker) {
super(worker, WebpageScreenshotJob.COMPONENT);
}
async start ( ) {
await super.start();
const workDirectory = path.join(
process.env.DTP_IMAGE_WORK_PATH,
'webpage-screenshot',
);
await fs.promises.mkdir(workDirectory, { recursive: true });
this.log.info('starting Puppeteer browser engine');
this.browser = await puppeteer.launch();
this.queue = await this.getJobQueue('media');
this.log.info('registering job processor', { queue: this.queue.name, name: 'webpage-screenshot' });
this.queue.process('webpage-screenshot', 1, this.processWebpageScreenshot.bind(this));
}
async stop ( ) {
if (this.browser) {
this.log.info('stopping Puppeteer browser engine');
this.browser.close();
delete this.browser;
}
await super.stop();
}
/**
* Expected job data parameters: modelName, documentId, documentPath, pageUrl.
* @param {Job} job the Bull Queue job to be processed
*/
async processWebpageScreenshot (job) {
const { image: imageService } = this.dtp.services;
const { modelName, documentId, documentPath, ownerId, pageUrl } = job.data;
const model = mongoose.model(modelName);
if (!model) {
throw new Error(`Invalid model name specified for document: ${modelName}`);
}
const imageFilename = path.join(process.env.DTP_IMAGE_WORK_PATH, 'webpage-screenshot', `${documentId}.jpg`);
this.log.info('job received to capture webpage screenshot', { modelName, documentId, pageUrl });
job.data.viewport = Object.assign({
width: 720,
height: 600,
deviceScaleFactor: 1.0,
}, job.data.viewport);
try {
job.data.document = await model.findById(documentId);
if (!job.data.document) {
throw new Error(`document not found: ${modelName}:${documentId}`);
}
this.log.info('Opening web page', { modelName, documentId, pageUrl });
job.page = await this.browser.newPage();
if (!job.page) {
throw new Error('failed to create new browser page for capturing screenshot', { modelName, documentId, pageUrl });
}
const userAgent = new UserAgent();
await job.page.setUserAgent(userAgent.toString());
await job.page.setViewport(job.data.viewport);
await job.page.goto(pageUrl, { waitUntil: 'networkidle2' });
this.jobLog(job, 'capturing screenshot to file');
await job.page.screenshot({
path: imageFilename,
type: 'jpeg',
quality: 85,
fullPage: job.data.fullPage || false,
});
this.jobLog(job, 'uploading screenshot to storage and database');
const outFileStat = await fs.promises.stat(imageFilename);
const imageDefinition = { };
const imageFile = {
path: imageFilename,
mimetype: 'image/jpeg',
size: outFileStat.size,
};
job.data.screenshotImage = await imageService.create({ _id: ownerId }, imageDefinition, imageFile);
this.jobLog(job, 'updating document with screenshot image');
const updateOp = { $set: { } };
updateOp.$set[documentPath] = job.data.screenshotImage._id;
await model.updateOne({ _id: documentId }, updateOp);
this.jobLog(job, 'screenshot captured and processed successfully');
} catch (error) {
this.log.error('failed to process webpage screenshot', { modelName, documentId, pageUrl, error });
throw error;
} finally {
if (job.page && !job.page.isClosed()) {
this.log.info('closing browser page after capturing screenshot', { modelName, documentId, pageUrl });
await job.page.close();
delete job.page;
}
this.log.info('removing temp screenshot file', { imageFilename });
await fs.promises.rm(imageFilename, { force: true });
}
}
}
module.exports = WebpageScreenshotJob;