// media/job/webpage-screenshot.js // Copyright (C) 2022 DTP Technologies, LLC // License: Apache-2.0 'use strict'; const path = require('path'); const fs = require('fs'); const mongoose = require('mongoose'); const puppeteer = require('puppeteer'); const userAgent = require('user-agent'); const { SiteWorkerProcess } = require(path.join(__dirname, '..', '..', '..', '..', 'lib', 'site-lib')); class WebpageScreenshotJob extends SiteWorkerProcess { static get COMPONENT ( ) { return { logId: 'webpage-screenshot-job', index: 'webpageScreenshotJob', className: 'WebpageScreenshotJob', }; } constructor (worker) { super(worker, WebpageScreenshotJob.COMPONENT); } async start ( ) { await super.start(); const workDirectory = path.join( process.env.DTP_IMAGE_WORK_PATH, 'webpage-screenshot', ); await fs.promises.mkdir(workDirectory, { recursive: true }); this.log.info('starting Puppeteer browser engine'); this.browser = await puppeteer.launch(); this.queue = await this.getJobQueue('media'); this.log.info('registering job processor', { queue: this.queue.name, name: 'webpage-screenshot' }); this.queue.process('webpage-screenshot', 1, this.processWebpageScreenshot.bind(this)); } async stop ( ) { if (this.browser) { this.log.info('stopping Puppeteer browser engine'); this.browser.close(); delete this.browser; } await super.stop(); } /** * Expected job data parameters: modelName, documentId, documentPath, pageUrl. * @param {Job} job the Bull Queue job to be processed */ async processWebpageScreenshot (job) { const { image: imageService } = this.dtp.services; const { modelName, documentId, documentPath, ownerId, pageUrl } = job.data; const model = mongoose.model(modelName); if (!model) { throw new Error(`Invalid model name specified for document: ${modelName}`); } const imageFilename = path.join(process.env.DTP_IMAGE_WORK_PATH, 'webpage-screenshot', `${documentId}.jpg`); this.log.info('job received to capture webpage screenshot', { modelName, documentId, pageUrl }); job.data.viewport = Object.assign({ width: 720, height: 600, deviceScaleFactor: 1.0, }, job.data.viewport); try { job.data.document = await model.findById(documentId); if (!job.data.document) { throw new Error(`document not found: ${modelName}:${documentId}`); } this.log.info('Opening web page', { modelName, documentId, pageUrl }); job.page = await this.browser.newPage(); if (!job.page) { throw new Error('failed to create new browser page for capturing screenshot', { modelName, documentId, pageUrl }); } await job.page.setUserAgent(userAgent.toString()); await job.page.setViewport(job.data.viewport); await job.page.goto(pageUrl, { waitUntil: 'networkidle2' }); this.jobLog(job, 'capturing screenshot to file'); await job.page.screenshot({ path: imageFilename, type: 'jpeg', quality: 85, fullPage: job.data.fullPage || false, }); this.jobLog(job, 'uploading screenshot to storage and database'); const outFileStat = await fs.promises.stat(imageFilename); const imageDefinition = { }; const imageFile = { path: imageFilename, mimetype: 'image/jpeg', size: outFileStat.size, }; job.data.screenshotImage = await imageService.create({ _id: ownerId }, imageDefinition, imageFile); this.jobLog(job, 'updating document with screenshot image'); const updateOp = { $set: { } }; updateOp.$set[documentPath] = job.data.screenshotImage._id; await model.updateOne({ _id: documentId }, updateOp); this.jobLog(job, 'screenshot captured and processed successfully'); } catch (error) { this.log.error('failed to process webpage screenshot', { modelName, documentId, pageUrl, error }); throw error; } finally { if (job.page && !job.page.isClosed()) { this.log.info('closing browser page after capturing screenshot', { modelName, documentId, pageUrl }); await job.page.close(); delete job.page; } this.log.info('removing temp screenshot file', { imageFilename }); await fs.promises.rm(imageFilename, { force: true }); } } } module.exports = WebpageScreenshotJob;