5 * Amazon SES (Simple email service) forwards email spam complaints to our Zendesk instance.
6 * This script will ingest these tickets and unsubscribe any users associated with
11 * 1. Fetch all Zendesk tickets sent from AWS
12 * 1. Process the `.eml` attachment to find who(which email) sent the complaint.
13 * If no attachment is present, will look for the ARF(embedded email abuse format)
14 * format in the ticket comment body.
15 * 1. Unsubscribe any Gitter users associated with that email
16 * 1. Add a comment to the ticket with what actions took place.
17 * 1. If we were able to unsubscribe someone, will solve the ticket
18 * 1. If any error took place while processing that specific ticket, will also put that
22 const assert = require('assert');
23 const debug = require('debug')('gitter:app:chat-search-service');
24 const shutdown = require('shutdown');
25 const util = require('util');
26 const request = util.promisify(require('request'));
27 const env = require('gitter-web-env');
28 const config = env.config;
29 const logger = env.logger;
30 const userService = require('gitter-web-users');
31 const userSettingsService = require('gitter-web-user-settings');
32 const downloadFileToBuffer = require('gitter-web-matrix-bridge/lib/download-file-to-buffer');
33 const obfuscateToken = require('gitter-web-github').obfuscateToken;
34 const unsubscribeHashes = require('gitter-web-email-notifications/lib/unsubscribe-hashes');
36 require('../../server/event-listeners').install();
38 const zendeskToken = config.get('zendesk:apiKey');
39 const zendeskEmail = config.get('zendesk:email');
40 // https://developer.zendesk.com/api-reference/ticketing/introduction/#api-token
41 const authorizationString = `${zendeskEmail}/token:${zendeskToken}`;
42 const authorizationStringBase64 = Buffer.from(authorizationString).toString('base64');
43 debug('Zendesk authorizationStringBase64', obfuscateToken(authorizationStringBase64));
44 const authorizationHeader = `Basic ${authorizationStringBase64}`;
46 const opts = require('yargs')
49 description: `If we're doing a dry-run, we won't actually unsubscribe anyone`
52 .alias('help', 'h').argv;
55 logger.info('Running as a dry-run!');
58 const AUTOMATED_MESSAGE_NOTICE = `**Note:**${
59 opts.dryRun ? ' This is a dry-run!' : ''
60 } This is an automated message from the [scripts/utils/unsubscribe-spam-complaints.js](https://gitlab.com/gitterHQ/webapp/-/blob/develop/scripts/utils/unsubscribe-spam-complaints.js) utility script (probably ran as a cron).`;
62 // This is to make code fences/blocks in Zendesk because ``` is not supported
63 function indent(inputString) {
66 .map(line => `\n ${line}`)
70 async function unsubscribeUserId(userId) {
71 return await userSettingsService.setUserSettings(userId, 'unread_notifications_optout', 1);
74 async function unsubscribeEmail(email) {
75 const users = await userService.findAllByEmail(email);
77 if (users.length === 0) {
78 logger.warn(`Unable to find any Gitter users associated with email=${email}`);
82 for await (let user of users) {
83 const userId = user.id || user._id;
86 await unsubscribeUserId(userId);
89 `Successfully unsubscribed userId=${userId} username=${user.username} email=${email}`
96 async function addCommentToTicket(ticketId, message, status) {
97 let endStatus = status;
98 // We should not modify the status of the ticket on dry-runs
100 endStatus = undefined;
103 const addCommentRes = await request({
105 uri: `https://gitter.zendesk.com/api/v2/tickets/${ticketId}`,
108 'Content-Type': 'application/json',
109 Authorization: authorizationHeader
122 if (addCommentRes.statusCode !== 200) {
124 `addCommentToTicket failed ticketId=${ticketId}, statusCode=${
125 addCommentRes.statusCode
126 }, body=${JSON.stringify(addCommentRes.body)}`
131 async function updateTicketWithUnsubscribedUsers(ticketId, unsubscribedUsers) {
133 assert(unsubscribedUsers);
134 assert(unsubscribedUsers.length > 0);
136 let status = 'solved';
137 // Successfully unsubscribed some users
138 let message = `${AUTOMATED_MESSAGE_NOTICE}
140 We've unsubscribed ${unsubscribedUsers
141 .map(unsubscribedUser => {
142 const userId = unsubscribedUser.id || unsubscribedUser._id;
143 return `${unsubscribedUser.username} (${userId})`;
145 .join(', ')} based this spam complaint.
148 await addCommentToTicket(ticketId, message, status);
151 async function _getReportedEmailContentsFromCommentAttachments(ticketId, comment) {
155 const attachments = comment.attachments;
157 const emailAttachments = attachments.filter(attachment => {
158 return attachment.file_name.endsWith('.eml');
161 // If there are more(or less) than 1 .eml attachment, we might process it wrong
162 // because we only expect the reported `.eml` to be present.
163 if (emailAttachments.length !== 1) {
165 `Expected 1 .eml attachment for ticketId=${ticketId} but received ${emailAttachments.length} attachments=${attachments}`
169 const emailAttachment = emailAttachments[0];
171 const data = await downloadFileToBuffer(emailAttachment.content_url);
173 return String(data.buffer);
176 // eslint-disable-next-line
178 Here is an example of what we are trying to processing.
179 It's called the Abuse Feedback Reporting Format (ARF)
180 but really it just seems like the reported .eml we sent in another .eml.
183 To: Gitter Notifications <support@gitter.im>
185 Content-Type: multipart/report; report-type=feedback-report;
186 boundary="feedback_part_610bfbf9_23c25613_42c9adea"
189 --feedback_part_610bfbf9_23c25613_42c9adea
190 Content-Type: text/plain; charset="US-ASCII"
191 Content-Transfer-Encoding: 7bit
193 This is an email abuse report for an email message received from IP
194 XX.XXX.XX.XXX on Thu, 05 Aug 2021 09:10:04 +0800. For more information
195 about this format please see http://www.mipassoc.org/arf/.
197 --feedback_part_610bfbf9_23c25613_42c9adea
198 Content-Type: message/feedback-report
201 User-Agent: mail.qq.com
203 Original-Rcpt-To: <xxx@qq.com>
205 --feedback_part_610bfbf9_23c25613_42c9adea
206 Content-Type: message/rfc822
207 Content-Disposition: inline
209 Received: from XX.XXX.XX.XXX (unknown [XX.XXX.XX.XXX])
210 by newmx32.qq.com (NewMx) with SMTP id
211 for <xxx@qq.com>; Thu, 05 Aug 2021 09:10:06 +0800
213 Content-Type: multipart/alternative;
214 boundary="--_NmP-3e1543c0fceea401-Part_1"
215 From: Gitter Notifications <support@gitter.im>
218 ----_NmP-3e1543c0fceea401-Part_1
219 Content-Type: text/plain; charset=utf-8
220 Content-Transfer-Encoding: quoted-printable
224 This is what you missed while you were away.
225 [Text email version...]
228 ----_NmP-3e1543c0fceea401-Part_1
229 Content-Type: text/html; charset=utf-8
230 [HTML email version...]
232 ----_NmP-3e1543c0fceea401-Part_1--
235 --feedback_part_610bfbf9_23c25613_42c9adea--
238 async function _getReportedEmailContentsFromCommentBody(ticketId, comment) {
242 // Process the Abuse Feedback Reporting Format (ARF), see the comment above for an example
243 const firstBoundaryMatches = comment.body.match(/^\s+boundary="(.*?)"$/m);
245 if (!firstBoundaryMatches) {
246 throw new Error('Unable to find boundary markers in ARF comment body');
249 const boundaryMarker = firstBoundaryMatches[1];
250 const boundarySplit = `--${boundaryMarker}`;
251 const arfPieces = comment.body.split(boundarySplit);
253 if (arfPieces.length <= 1) {
255 `Expected ARF from comment body to be made up of multiple pieces (most likely 4) but found ${arfPieces.length} pieces split up by \`${boundarySplit}\`.`
259 // Find the arf piece which has `Content-Type: message/rfc822` in it.
260 // This will be like `.eml` attachment we are used to in `getReportedEmailContentsFromCommentAttachments`
261 const eml = arfPieces.find(arfPiece => {
262 return arfPiece.match(/^Content-Type: message\/rfc822$/m);
268 async function getReportedEmailContentsFromComment(ticketId, comment) {
272 let reportedEmailContents;
273 let checkCommentAttachmentsError;
275 // First lets check if the spam complaint has a `.eml` attachment with the reported email in question
276 reportedEmailContents = await _getReportedEmailContentsFromCommentAttachments(
281 checkCommentAttachmentsError = err;
284 let checkInlineCommentError;
285 if (!reportedEmailContents) {
287 // Fallback to trying to parse the report directly in the comment itself.
288 // The ticket comment itself might be a raw ARF format.
289 reportedEmailContents = await _getReportedEmailContentsFromCommentBody(ticketId, comment);
291 checkInlineCommentError = err;
295 if (!reportedEmailContents) {
297 Unable to get reported email contents from this spam complaint.
298 We checked the attachments on this ticket but ran into this problem:
300 ${checkCommentAttachmentsError && indent(checkCommentAttachmentsError.stack)}
302 We also checked the ticket comment itself but weren't able to see or parse an ARF format from it:
304 ${checkInlineCommentError && indent(checkInlineCommentError.stack)}
308 // Unwrap "Content-Transfer-Encoding: quoted-printable" text which has
309 // lines soft-wrapped at 76 characters and split up with `=\n`.
311 // > The Quoted-Printable encoding REQUIRES that encoded lines be no
312 // > more than 76 characters long. If longer lines are to be encoded
313 // > with the Quoted-Printable encoding, 'soft' line breaks must be
314 // > used. An equal sign as the last character on a encoded line
315 // > indicates such a non-significant ('soft') line break in the encoded
318 // > https://www.w3.org/Protocols/rfc1341/5_Content-Transfer-Encoding.html
319 const unwrappedEmailContents = reportedEmailContents.replace(/=\n/gm, '');
321 return unwrappedEmailContents;
324 // Look for the /unsubscribe link in the email and decipher it to find the userId.
325 // ex. https://gitter.im/settings/unsubscribe/5cd788edba69ca1604f1536d71eb5aed540cd87cc3d4c21ee5a7ecfbf852987c459c26fe127f20cf9eca2fb2d2fc1262f
326 async function _unsubscribeUsersBasedOnUnsubscribeHashInEmail(ticketId, reportedEmailContents) {
328 assert(reportedEmailContents);
330 const unsubscribeHashMatches = reportedEmailContents.match(
331 /"https:\/\/gitter.im\/settings\/unsubscribe\/(.*?)"/m
334 if (unsubscribeHashMatches) {
335 const hash = unsubscribeHashMatches[1];
336 const { userId } = unsubscribeHashes.decipherHash(hash);
338 await unsubscribeUserId(userId);
340 const user = await userService.findById(userId);
342 await updateTicketWithUnsubscribedUsers(ticketId, [user]);
345 `Unable to find the https://gitter.im/settings/unsubscribe/xxx link in the reported .eml for ticketId=${ticketId}`
350 async function _unsubscribeUsersBasedOnToField(ticketId, reportedEmailContents) {
352 assert(reportedEmailContents);
354 const emailMatches = reportedEmailContents.match(/^To: (.*?)$/m);
357 const email = emailMatches[1];
358 const unsubscribedUsers = await unsubscribeEmail(email);
360 if (!unsubscribedUsers.length) {
362 `Unable to find any Gitter users associated with this spam complaint. You probably just want to close this ticket but we've left it open for you to review.`
366 await updateTicketWithUnsubscribedUsers(ticketId, unsubscribedUsers);
368 throw new Error(`Unable to find the To: field in the reported .eml for ticketId=${ticketId}`);
372 async function unsubscribeUsersFromReportedEmailContents(ticketId, reportedEmailContents) {
374 assert(reportedEmailContents);
376 let checkUnsubscribeHashError;
378 // First check for a possible Gitter /unsubscribe hash in the email and try using that
379 await _unsubscribeUsersBasedOnUnsubscribeHashInEmail(ticketId, reportedEmailContents);
381 checkUnsubscribeHashError = err;
384 let checkToFieldError;
385 if (checkUnsubscribeHashError) {
387 // Not all emails have the /unsubscribe link so fallback
388 // to checking the To: field (where the email was sent to).
389 // This isn't 100% reliable though as we sometimes find emails
390 // where the email isn't associated any of our Gitter users
391 // (probably some internal email rewriting routing).
392 await _unsubscribeUsersBasedOnToField(ticketId, reportedEmailContents);
394 checkToFieldError = err;
398 if (checkUnsubscribeHashError && checkToFieldError) {
400 Unable to find anyone to unsubscribe from this spam complaint.
401 We checked the for an /unsubscribe hash but ran into this problem:
403 ${indent(checkUnsubscribeHashError.stack)}
405 We also checked for any users associated with the email defined in the To: field but ran into this problem:
407 ${indent(checkToFieldError.stack)}
412 async function fetchSpamComplaintTicketIds() {
415 // Recursive pagination function
416 async function _paginateTickets(url) {
417 const ticketSearchRes = await request({
422 'Content-Type': 'application/json',
423 Authorization: authorizationHeader
427 if (ticketSearchRes.statusCode !== 200) {
429 `fetchSpamComplaintTicketIds failed to fetch tickets, pageCount=${pageCount} statusCode=${
430 ticketSearchRes.statusCode
431 }, body=${JSON.stringify(ticketSearchRes.body)}`
435 const ticketIds = ticketSearchRes.body.results.map(searchResult => {
436 return searchResult.id;
439 if (ticketSearchRes.body.next_page) {
441 return ticketIds.concat(_paginateTickets(ticketSearchRes.body.next_page));
447 // https://developer.zendesk.com/api-reference/ticketing/ticket-management/search/
448 const query = `type:ticket status:open requester:complaints@email-abuse.amazonses.com`;
449 const url = `https://gitter.zendesk.com/api/v2/search.json?query=${encodeURIComponent(
451 )}&sort_by=created_atstatus&sort_order=desc`;
453 return await _paginateTickets(url);
456 async function processSpamComplaints() {
457 logger.info('Fetching spam complaint tickets');
458 const spamComplaintTicketIds = await fetchSpamComplaintTicketIds();
459 logger.info('spamComplaintTicketIds:', spamComplaintTicketIds.join(', '));
461 for await (let ticketId of spamComplaintTicketIds) {
463 const ticketCommentGetRes = await request({
465 uri: `https://gitter.zendesk.com/api/v2/tickets/${ticketId}/comments`,
468 'Content-Type': 'application/json',
469 Authorization: authorizationHeader
473 if (ticketCommentGetRes.statusCode !== 200) {
475 `Failed to fetch comments for ticket, ticketId=${ticketId} statusCode=${
476 ticketCommentGetRes.statusCode
477 }, body=${JSON.stringify(ticketCommentGetRes.body)}`
481 const comment = ticketCommentGetRes.body.comments[0];
482 const reportedEmailContents = await getReportedEmailContentsFromComment(ticketId, comment);
484 await unsubscribeUsersFromReportedEmailContents(ticketId, reportedEmailContents);
486 // Log the error and move on to the next ticket
487 const errorMessage = `Failed to process ticketId=${ticketId}: ${err.stack}`;
488 logger.error(errorMessage);
489 await addCommentToTicket(ticketId, `${AUTOMATED_MESSAGE_NOTICE}\n\n${errorMessage}`);
496 await processSpamComplaints();
497 logger.info(`Done handling spam complaints`);
499 // wait 5 seconds to allow for asynchronous `event-listeners` to finish
500 // This isn't clean but works
501 // https://github.com/troupe/gitter-webapp/issues/580#issuecomment-147445395
502 // https://gitlab.com/gitterHQ/webapp/merge_requests/1605#note_222861592
503 logger.info(`Waiting 5 seconds to allow for the asynchronous \`event-listeners\` to finish...`);
504 await new Promise(resolve => setTimeout(resolve, 5000));
506 logger.info('Error', err.stack);
508 shutdown.shutdownGracefully();