Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"@notionhq/client": "^2.2.3",
"@sentry/node": "^7.50.0",
"@temporalio/client": "^1.11.3",
"@togethercrew.dev/db": "^3.2.3",
"@togethercrew.dev/db": "^3.3.0",
"@togethercrew.dev/tc-messagebroker": "^0.0.50",
"@types/express-session": "^1.17.7",
"@types/morgan": "^1.9.5",
Expand Down
5 changes: 4 additions & 1 deletion src/docs/module.doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,8 @@ paths:
name:
type: string
description: Name of the platform.
enum: ['discord', 'google', 'github', 'notion', 'mediaWiki', 'discourse']
enum:
['discord', 'google', 'github', 'notion', 'mediaWiki', 'discourse', 'telegram', 'website']
metadata:
type: object
description: Metadata specific to the module and platform. Varies depending on the platform name and module name.
Expand Down Expand Up @@ -236,6 +237,8 @@ paths:
items:
type: string
description: Metadata for the hivemind module on MediaWiki.
- type: object
description: Metadata for the hivemind module on website.
- type: object
properties:
selectedEmails:
Expand Down
15 changes: 14 additions & 1 deletion src/docs/platform.doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ paths:
- mediaWiki
- discourse
- telegram
- website
description: Name of the platform to create. Must be one of the supported platforms.
community:
type: string
Expand Down Expand Up @@ -194,7 +195,17 @@ paths:
properties:
chat:
type: object
description: Metadata for Telegram.
description: Metadata for Website.
- type: object
required: [resources]
properties:
resources:
type: array
items:
type: string
format: uri
description: Metadata for Website.

responses:
'201':
description: Platform created successfully.
Expand Down Expand Up @@ -227,6 +238,8 @@ paths:
- github
- notion
- mediaWiki
- telegram
- website
- in: query
name: community
schema:
Expand Down
2 changes: 2 additions & 0 deletions src/services/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import platformService from './platform.service';
import reputationScoreService from './reputationScore.service';
import sagaService from './saga.service';
import telegramService from './telegram';
import websiteService from './temporal/website.service';
import tokenService from './token.service';
import twitterService from './twitter.service';
import userService from './user.service';
Expand All @@ -36,4 +37,5 @@ export {
discourseService,
telegramService,
reputationScoreService,
websiteService,
};
17 changes: 11 additions & 6 deletions src/services/module.service.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import { FilterQuery, HydratedDocument, Types } from 'mongoose';
import { FilterQuery, HydratedDocument, ObjectId, Types } from 'mongoose';

import { IModule, IModuleUpdateBody, Module } from '@togethercrew.dev/db';
import { IModule, IModuleUpdateBody, Module, ModuleNames, PlatformNames } from '@togethercrew.dev/db';

import platformService from './platform.service';
import websiteService from './website';

/**
* Create a module
Expand Down Expand Up @@ -60,7 +63,6 @@ const updateModule = async (
module: HydratedDocument<IModule>,
updateBody: Partial<IModuleUpdateBody>,
): Promise<HydratedDocument<IModule>> => {
// Check if `options.platforms` is in the updateBody
if (updateBody.options && updateBody.options.platforms) {
if (updateBody.options.platforms[0].name == undefined) {
{
Expand All @@ -69,15 +71,18 @@ const updateModule = async (
else module.options?.platforms.push(updateBody.options.platforms[0]);
}
} else {
// Iterate through each platform in the incoming update
for (const newPlatform of updateBody.options.platforms) {
const existingPlatform = module.options?.platforms.find((p) => p.name === newPlatform.name);
if (existingPlatform) {
// If the platform already exists, update it
existingPlatform.metadata = newPlatform.metadata;
} else {
// If the platform does not exist, add new
module.options?.platforms.push(newPlatform);
if (module.name === ModuleNames.Hivemind && newPlatform.name === PlatformNames.Website) {
const scheduleId = await websiteService.coreService.createWebsiteSchedule(newPlatform.platform);
const platform = await platformService.getPlatformById(newPlatform.platform);
platform?.set('metadata.scheduleId', scheduleId);
await platform?.save();
}
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions src/services/platform.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,8 @@ function getMetadataKey(platformName: string): string {
return 'baseURL';
case PlatformNames.Discourse:
return 'id';
case PlatformNames.Website:
return 'resources';
default:
throw new Error('Unsupported platform');
}
Expand Down Expand Up @@ -210,6 +212,7 @@ const managePlatformConnection = async (
const activePlatformOtherCommunity = await Platform.findOne({
community: { $ne: communityId },
[`metadata.${metadataKey}`]: metadataId,
name: platformData.name,
});

if (activePlatformOtherCommunity) {
Expand Down
7 changes: 6 additions & 1 deletion src/services/temporal/discourse.service.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import parentLogger from 'src/config/logger';

import { CalendarSpec, Client, ScheduleHandle, ScheduleOverlapPolicy } from '@temporalio/client';
import { TemporalCoreService } from './core.service';

import config from '../../config';
import { TemporalCoreService } from './core.service';

const logger = parentLogger.child({ module: 'DiscourseTemporalService' });

class TemporalDiscourseService extends TemporalCoreService {
public async createSchedule(platformId: string, endpoint: string): Promise<ScheduleHandle> {
Expand Down
64 changes: 64 additions & 0 deletions src/services/temporal/website.service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { Types } from 'mongoose';
import parentLogger from 'src/config/logger';

import { CalendarSpec, Client, ScheduleHandle, ScheduleOverlapPolicy } from '@temporalio/client';

import config from '../../config';
import { TemporalCoreService } from './core.service';

const logger = parentLogger.child({ module: 'WebsiteTemporalService' });

class TemporalWebsiteService extends TemporalCoreService {
public async createSchedule(platformId: Types.ObjectId): Promise<ScheduleHandle> {
const initiationTime = new Date();
const dayNumber = initiationTime.getUTCDay();
const hour = initiationTime.getUTCHours();
const minute = initiationTime.getUTCMinutes();
const DAY_NAMES = ['SUNDAY', 'MONDAY', 'TUESDAY', 'WEDNESDAY', 'THURSDAY', 'FRIDAY', 'SATURDAY'] as const;
const dayOfWeek = DAY_NAMES[dayNumber];

const calendarSpec: CalendarSpec = {
dayOfWeek,
hour,
minute,
comment: `Weekly schedule for ${dayOfWeek} at ${hour}:${minute} UTC`,
};

try {
const client: Client = await this.getClient();

return client.schedule.create({
scheduleId: `website/${platformId}`,
spec: {
calendars: [calendarSpec],
},
action: {
type: 'startWorkflow',
workflowType: 'WebsiteIngestionSchedulerWorkflow',
args: [{ platformId }],
taskQueue: config.temporal.heavyQueue,
},
policies: {
catchupWindow: '1 day',
overlap: ScheduleOverlapPolicy.SKIP,
},
});
} catch (error) {
throw new Error(`Failed to create or update website ingestion schedule: ${(error as Error).message}`);
}
}

public async pauseSchedule(scheduleId: string): Promise<void> {
const client: Client = await this.getClient();
const handle = client.schedule.getHandle(scheduleId);
await handle.pause();
}

public async deleteSchedule(scheduleId: string): Promise<void> {
const client: Client = await this.getClient();
const handle = client.schedule.getHandle(scheduleId);
await handle.delete();
}
}

export default new TemporalWebsiteService();
33 changes: 33 additions & 0 deletions src/services/website/core.service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { Types } from 'mongoose';

import parentLogger from '../../config/logger';
import { ApiError } from '../../utils';
import temporalWebsite from '../temporal/website.service';

const logger = parentLogger.child({ module: 'WebsiteCoreService' });

async function createWebsiteSchedule(platformId: Types.ObjectId): Promise<string> {
try {
const schedule = await temporalWebsite.createSchedule(platformId);
logger.info(`Started schedule '${schedule.scheduleId}'`);
await schedule.trigger();
return schedule.scheduleId;
} catch (error) {
logger.error(error, 'Failed to trigger website schedule.');
throw new ApiError(590, 'Failed to create website schedule.');
}
}

async function deleteWebsiteSchedule(scheduleId: string): Promise<void> {
try {
await temporalWebsite.deleteSchedule(scheduleId);
} catch (error) {
logger.error(error, 'Failed to delete website schedule.');
throw new ApiError(590, 'Failed to delete website schedule.');
}
}

export default {
createWebsiteSchedule,
deleteWebsiteSchedule,
};
5 changes: 5 additions & 0 deletions src/services/website/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import coreService from './core.service';

export default {
coreService,
};
15 changes: 12 additions & 3 deletions src/validations/module.validation.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import Joi from 'joi';
import { objectId } from './custom.validation';

import {
PlatformNames,
ModuleNames,
HivemindPlatformNames,
ModuleNames,
PlatformNames,
ViolationDetectionPlatformNames,
} from '@togethercrew.dev/db';

import { objectId } from './custom.validation';

const createModule = {
body: Joi.object().keys({
name: Joi.string()
Expand Down Expand Up @@ -77,6 +79,9 @@ const hivemindMediaWikiMetadata = () => {
});
};

const websiteMediaWikiMetadata = () => {
return Joi.object().keys({});
};
Comment on lines +82 to +84
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Consider enhancing website metadata validation.

The empty object schema for website metadata might be too permissive. Consider adding validation for essential website-related fields such as:

  • Base URL
  • Scraping configuration
  • Rate limiting parameters

const hivemindOptions = () => {
return Joi.object().keys({
platforms: Joi.array().items(
Expand Down Expand Up @@ -107,6 +112,10 @@ const hivemindOptions = () => {
is: PlatformNames.MediaWiki,
then: hivemindMediaWikiMetadata(),
},
{
is: PlatformNames.Website,
then: websiteMediaWikiMetadata(),
},
],
otherwise: Joi.any().forbidden(),
}).required(),
Expand Down
30 changes: 29 additions & 1 deletion src/validations/platform.validation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ const discordUpdateMetadata = () => {
analyzerStartedAt: Joi.date(),
});
};

const websiteUpdateMetadata = () => {
return Joi.object().keys({
resources: Joi.array().items(Joi.string().uri({ scheme: ['http', 'https'] })),
});
};
Comment on lines +32 to +36
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Enhance website resource validation with additional safeguards.

While URI validation is good, consider adding:

  1. Rate limiting parameters to prevent aggressive scraping
  2. Allowed domains validation to prevent unauthorized access
  3. Maximum number of resources limit

Example enhancement:

 const websiteMetadata = () => {
   return Joi.object().keys({
     resources: Joi.array()
       .items(Joi.string().uri({ scheme: ['http', 'https'] }))
+      .max(100) // Prevent excessive resource lists
       .required(),
+    rateLimit: Joi.object().keys({
+      requestsPerMinute: Joi.number().min(1).max(60).required(),
+      concurrency: Joi.number().min(1).max(10).required()
+    }).required(),
+    allowedDomains: Joi.array().items(Joi.string().domain()).required()
   });
 };

Also applies to: 101-107

const twitterMetadata = () => {
return Joi.object().keys({
id: Joi.string().required(),
Expand Down Expand Up @@ -92,6 +98,14 @@ const discourseMetadata = () => {
});
};

const websiteMetadata = () => {
return Joi.object().keys({
resources: Joi.array()
.items(Joi.string().uri({ scheme: ['http', 'https'] }))
.required(),
});
};

const createPlatform = {
body: Joi.object().keys({
name: Joi.string()
Expand Down Expand Up @@ -130,7 +144,11 @@ const createPlatform = {
},
{
is: PlatformNames.Telegram,
then: telegramMetadata,
then: telegramMetadata(),
},
{
is: PlatformNames.Website,
then: websiteMetadata(),
},
],
}).required(),
Expand Down Expand Up @@ -201,6 +219,16 @@ const dynamicUpdatePlatform = (req: Request) => {
}),
};
}
case PlatformNames.Website: {
return {
params: Joi.object().keys({
platformId: Joi.required().custom(objectId),
}),
body: Joi.object().required().keys({
metadata: websiteUpdateMetadata(),
}),
};
}
default:
req.allowInput = false;
return {};
Expand Down