Przeglądaj źródła

Most viewed queries performance optimization (#20)

* Most viewed queries performance optimization

* PR FIX

* Fix most viewed args description

* PR FIX 2

* improve code readability (#1)

Co-authored-by: Klaudiusz Dembler <accounts@kdembler.com>
Rafał Pawłow 3 lat temu
rodzic
commit
7112506b05
6 zmienionych plików z 178 dodań i 115 usunięć
  1. 1 0
      .gitignore
  2. 24 9
      schema.graphql
  3. 101 6
      src/aggregates/views.ts
  4. 37 85
      src/resolvers/viewsInfo.ts
  5. 9 9
      tests/queries/views.ts
  6. 6 6
      tests/views.test.ts

+ 1 - 0
.gitignore

@@ -7,3 +7,4 @@ dist/
 globalConfig.json
 
 .idea
+db/

+ 24 - 9
schema.graphql

@@ -46,22 +46,37 @@ type Query {
   """Get list of most followed channels of all time"""
   mostFollowedChannelsAllTime(limit: Int!): [ChannelFollowsInfo!]
 
-  """Get most viewed list of categories"""
-  mostViewedCategories(limit: Int, period: Int!): [EntityViewsInfo!]
+  """Get list of most viewed categories in a given time period"""
+  mostViewedCategories(
+    limit: Int
 
-  """Get most viewed list of categories of all time"""
+    """timePeriodDays must take one of the following values: 7, 30"""
+    timePeriodDays: Int!
+  ): [EntityViewsInfo!]
+
+  """Get list of most viewed categories of all time"""
   mostViewedCategoriesAllTime(limit: Int!): [EntityViewsInfo!]
 
-  """Get most viewed list of channels"""
-  mostViewedChannels(limit: Int, period: Int!): [EntityViewsInfo!]
+  """Get list of most viewed channels in a given time period"""
+  mostViewedChannels(
+    limit: Int
+
+    """timePeriodDays must take one of the following values: 7, 30"""
+    timePeriodDays: Int!
+  ): [EntityViewsInfo!]
 
-  """Get most viewed list of channels of all time"""
+  """Get list of most viewed channels of all time"""
   mostViewedChannelsAllTime(limit: Int!): [EntityViewsInfo!]
 
-  """Get most viewed list of videos"""
-  mostViewedVideos(limit: Int, period: Int!): [EntityViewsInfo!]
+  """Get list of most viewed videos in a given time period"""
+  mostViewedVideos(
+    limit: Int
+
+    """timePeriodDays must take one of the following values: 7, 30"""
+    timePeriodDays: Int!
+  ): [EntityViewsInfo!]
 
-  """Get most viewed list of videos of all time"""
+  """Get list of most viewed videos of all time"""
   mostViewedVideosAllTime(limit: Int!): [EntityViewsInfo!]
 
   """Get views count for a single video"""

+ 101 - 6
src/aggregates/views.ts

@@ -1,23 +1,96 @@
 import { UnsequencedVideoEvent, VideoEvent, VideoEventsBucketModel, VideoEventType } from '../models/VideoEvent'
 import { EntityViewsInfo } from '../entities/EntityViewsInfo'
+import { mapPeriods } from '../resolvers/viewsInfo'
+import { differenceInCalendarDays } from 'date-fns'
 
 type VideoEventsAggregationResult = {
   events?: VideoEvent[]
 }[]
 
+type TimePeriodEventsData = {
+  sevenDays: Partial<UnsequencedVideoEvent>[]
+  thirtyDays: Partial<UnsequencedVideoEvent>[]
+}
+
+type TimePeriodViews = {
+  sevenDays: EntityViewsInfo[]
+  thirtyDays: EntityViewsInfo[]
+}
+
 export class ViewsAggregate {
   private videoViewsMap: Record<string, number> = {}
   private channelViewsMap: Record<string, number> = {}
   private categoryViewsMap: Record<string, number> = {}
+
+  private timePeriodEvents: TimePeriodEventsData = {
+    sevenDays: [],
+    thirtyDays: [],
+  }
+
+  private timePeriodVideoViews: TimePeriodViews = {
+    sevenDays: [],
+    thirtyDays: [],
+  }
+
+  private timePeriodChannelViews: TimePeriodViews = {
+    sevenDays: [],
+    thirtyDays: [],
+  }
+
+  private timePeriodCategoryViews: TimePeriodViews = {
+    sevenDays: [],
+    thirtyDays: [],
+  }
+
   private allViewsEvents: Partial<UnsequencedVideoEvent>[] = []
   private allVideoViews: EntityViewsInfo[] = []
   private allChannelViews: EntityViewsInfo[] = []
   private allCategoryViews: EntityViewsInfo[] = []
 
-  private addOrUpdateViews(array: EntityViewsInfo[], id: string): void {
-    const i = array.findIndex((element) => element.id === id)
-    if (i > -1) array[i].views = array[i].views + 1
-    else array.push({ id, views: 1 })
+  private addOrUpdateViews(array: EntityViewsInfo[], id: string, shouldAdd = true): void {
+    const viewsObject = array.find((element) => element.id === id)
+
+    if (viewsObject) {
+      if (!viewsObject.views && !shouldAdd) return
+
+      if (shouldAdd) {
+        viewsObject.views++
+      } else {
+        viewsObject.views--
+      }
+    } else {
+      array.push({ id, views: shouldAdd ? 1 : 0 })
+    }
+
+    array.sort((a, b) => (a.views > b.views ? -1 : 1))
+  }
+
+  public filterEventsByPeriod(timePeriodDays: number) {
+    const mappedPeriod = mapPeriods(timePeriodDays)
+    const viewEvents = this.timePeriodEvents[mappedPeriod]
+
+    // find index of first event that should be kept
+    const firstEventToIncludeIdx = viewEvents.findIndex(
+      (view) => view.timestamp && differenceInCalendarDays(new Date(), view.timestamp) <= timePeriodDays
+    )
+
+    // update views with all of the events that should be removed
+    for (let i = 0; i < firstEventToIncludeIdx; i++) {
+      const { videoId, channelId, categoryId } = viewEvents[i]
+
+      if (videoId) {
+        this.addOrUpdateViews(this.timePeriodVideoViews[mappedPeriod], videoId, false)
+      }
+      if (channelId) {
+        this.addOrUpdateViews(this.timePeriodChannelViews[mappedPeriod], channelId, false)
+      }
+      if (categoryId) {
+        this.addOrUpdateViews(this.timePeriodCategoryViews[mappedPeriod], categoryId, false)
+      }
+    }
+
+    // remove older events
+    this.timePeriodEvents[mappedPeriod] = viewEvents.slice(firstEventToIncludeIdx)
   }
 
   public videoViews(videoId: string): number | null {
@@ -52,6 +125,18 @@ export class ViewsAggregate {
     return this.allCategoryViews
   }
 
+  public getTimePeriodVideoViews() {
+    return this.timePeriodVideoViews
+  }
+
+  public getTimePeriodChannelViews() {
+    return this.timePeriodChannelViews
+  }
+
+  public getTimePeriodCategoryViews() {
+    return this.timePeriodCategoryViews
+  }
+
   public static async Build() {
     const aggregation: VideoEventsAggregationResult = await VideoEventsBucketModel.aggregate([
       { $unwind: '$events' },
@@ -69,7 +154,8 @@ export class ViewsAggregate {
   }
 
   public applyEvent(event: UnsequencedVideoEvent) {
-    const { videoId, channelId, categoryId, timestamp, type } = event
+    const { type, ...eventWithoutType } = event
+    const { videoId, channelId, categoryId } = eventWithoutType
     const currentVideoViews = videoId ? this.videoViewsMap[videoId] || 0 : 0
     const currentChannelViews = channelId ? this.channelViewsMap[channelId] || 0 : 0
     const currentCategoryViews = categoryId ? this.categoryViewsMap[categoryId] || 0 : 0
@@ -77,18 +163,27 @@ export class ViewsAggregate {
     switch (type) {
       case VideoEventType.AddView:
         if (videoId) {
+          this.addOrUpdateViews(this.timePeriodVideoViews.sevenDays, videoId)
+          this.addOrUpdateViews(this.timePeriodVideoViews.thirtyDays, videoId)
           this.addOrUpdateViews(this.allVideoViews, videoId)
           this.videoViewsMap[videoId] = currentVideoViews + 1
         }
         if (channelId) {
+          this.addOrUpdateViews(this.timePeriodChannelViews.sevenDays, channelId)
+          this.addOrUpdateViews(this.timePeriodChannelViews.thirtyDays, channelId)
           this.addOrUpdateViews(this.allChannelViews, channelId)
           this.channelViewsMap[channelId] = currentChannelViews + 1
         }
         if (categoryId) {
+          this.addOrUpdateViews(this.timePeriodCategoryViews.sevenDays, categoryId)
+          this.addOrUpdateViews(this.timePeriodCategoryViews.thirtyDays, categoryId)
           this.addOrUpdateViews(this.allCategoryViews, categoryId)
           this.categoryViewsMap[categoryId] = currentCategoryViews + 1
         }
-        this.allViewsEvents = [...this.allViewsEvents, { videoId, channelId, categoryId, timestamp }]
+
+        this.timePeriodEvents.sevenDays.push(eventWithoutType)
+        this.timePeriodEvents.thirtyDays.push(eventWithoutType)
+
         break
       default:
         console.error(`Parsing unknown video event: ${type}`)

+ 37 - 85
src/resolvers/viewsInfo.ts

@@ -1,11 +1,10 @@
 import { Args, ArgsType, Ctx, Field, ID, Int, Mutation, Query, Resolver } from 'type-graphql'
-import { Min, Max } from 'class-validator'
-import { differenceInCalendarDays } from 'date-fns'
+import { Min, Max, IsIn } from 'class-validator'
 import { EntityViewsInfo } from '../entities/EntityViewsInfo'
 import { saveVideoEvent, VideoEventType, UnsequencedVideoEvent } from '../models/VideoEvent'
 import { OrionContext } from '../types'
 
-const MAXIMUM_PERIOD = 30
+export const mapPeriods = (period: number) => (period === 7 ? 'sevenDays' : 'thirtyDays')
 
 @ArgsType()
 class VideoViewsArgs {
@@ -20,22 +19,12 @@ class BatchedVideoViewsArgs {
 }
 
 @ArgsType()
-class MostViewedVideosArgs {
-  @Field(() => Int)
-  @Min(1)
-  @Max(MAXIMUM_PERIOD)
-  period: number
-
-  @Field(() => Int, { nullable: true })
-  limit?: number
-}
-
-@ArgsType()
-class MostViewedChannelArgs {
-  @Field(() => Int)
-  @Min(1)
-  @Max(MAXIMUM_PERIOD)
-  period: number
+class MostViewedArgs {
+  @Field(() => Int, {
+    description: 'timePeriodDays must take one of the following values: 7, 30',
+  })
+  @IsIn([7, 30])
+  timePeriodDays: number
 
   @Field(() => Int, { nullable: true })
   limit?: number
@@ -53,17 +42,6 @@ class BatchedChannelViewsArgs {
   channelIdList: string[]
 }
 
-@ArgsType()
-class MostViewedCategoriesArgs {
-  @Field(() => Int)
-  @Min(1)
-  @Max(MAXIMUM_PERIOD)
-  period: number
-
-  @Field(() => Int, { nullable: true })
-  limit?: number
-}
-
 @ArgsType()
 class AddVideoViewArgs {
   @Field(() => ID)
@@ -99,52 +77,64 @@ export class VideoViewsInfosResolver {
     return videoIdList.map((videoId) => getVideoViewsInfo(videoId, ctx))
   }
 
-  @Query(() => [EntityViewsInfo], { nullable: true, description: 'Get most viewed list of videos' })
+  @Query(() => [EntityViewsInfo], {
+    nullable: true,
+    description: 'Get list of most viewed videos in a given time period',
+  })
   async mostViewedVideos(
-    @Args() { period, limit }: MostViewedVideosArgs,
+    @Args() { timePeriodDays, limit }: MostViewedArgs,
     @Ctx() ctx: OrionContext
   ): Promise<EntityViewsInfo[]> {
-    return mapMostViewedArray(buildMostViewedVideosArray(ctx, period), limit)
+    ctx.viewsAggregate.filterEventsByPeriod(timePeriodDays)
+    return limitViews(ctx.viewsAggregate.getTimePeriodVideoViews()[mapPeriods(timePeriodDays)], limit)
   }
 
-  @Query(() => [EntityViewsInfo], { nullable: true, description: 'Get most viewed list of videos of all time' })
+  @Query(() => [EntityViewsInfo], { nullable: true, description: 'Get list of most viewed videos of all time' })
   async mostViewedVideosAllTime(
     @Args() { limit }: MostViewedAllTimeArgs,
     @Ctx() ctx: OrionContext
   ): Promise<EntityViewsInfo[]> {
-    return sortAndLimitViews(ctx.viewsAggregate.getAllVideoViews(), limit)
+    return limitViews(ctx.viewsAggregate.getAllVideoViews(), limit)
   }
 
-  @Query(() => [EntityViewsInfo], { nullable: true, description: 'Get most viewed list of channels' })
+  @Query(() => [EntityViewsInfo], {
+    nullable: true,
+    description: 'Get list of most viewed channels in a given time period',
+  })
   async mostViewedChannels(
-    @Args() { period, limit }: MostViewedChannelArgs,
+    @Args() { timePeriodDays, limit }: MostViewedArgs,
     @Ctx() ctx: OrionContext
   ): Promise<EntityViewsInfo[]> {
-    return mapMostViewedArray(buildMostViewedChannelsArray(ctx, period), limit)
+    ctx.viewsAggregate.filterEventsByPeriod(timePeriodDays)
+    return limitViews(ctx.viewsAggregate.getTimePeriodChannelViews()[mapPeriods(timePeriodDays)], limit)
   }
 
-  @Query(() => [EntityViewsInfo], { nullable: true, description: 'Get most viewed list of channels of all time' })
+  @Query(() => [EntityViewsInfo], { nullable: true, description: 'Get list of most viewed channels of all time' })
   async mostViewedChannelsAllTime(
     @Args() { limit }: MostViewedAllTimeArgs,
     @Ctx() ctx: OrionContext
   ): Promise<EntityViewsInfo[]> {
-    return sortAndLimitViews(ctx.viewsAggregate.getAllChannelViews(), limit)
+    return limitViews(ctx.viewsAggregate.getAllChannelViews(), limit)
   }
 
-  @Query(() => [EntityViewsInfo], { nullable: true, description: 'Get most viewed list of categories' })
+  @Query(() => [EntityViewsInfo], {
+    nullable: true,
+    description: 'Get list of most viewed categories in a given time period',
+  })
   async mostViewedCategories(
-    @Args() { period, limit }: MostViewedCategoriesArgs,
+    @Args() { timePeriodDays, limit }: MostViewedArgs,
     @Ctx() ctx: OrionContext
   ): Promise<EntityViewsInfo[]> {
-    return mapMostViewedArray(buildMostViewedCategoriesArray(ctx, period), limit)
+    ctx.viewsAggregate.filterEventsByPeriod(timePeriodDays)
+    return limitViews(ctx.viewsAggregate.getTimePeriodCategoryViews()[mapPeriods(timePeriodDays)], limit)
   }
 
-  @Query(() => [EntityViewsInfo], { nullable: true, description: 'Get most viewed list of categories of all time' })
+  @Query(() => [EntityViewsInfo], { nullable: true, description: 'Get list of most viewed categories of all time' })
   async mostViewedCategoriesAllTime(
     @Args() { limit }: MostViewedAllTimeArgs,
     @Ctx() ctx: OrionContext
   ): Promise<EntityViewsInfo[]> {
-    return sortAndLimitViews(ctx.viewsAggregate.getAllCategoryViews(), limit)
+    return limitViews(ctx.viewsAggregate.getAllCategoryViews(), limit)
   }
 
   @Query(() => EntityViewsInfo, { nullable: true, description: 'Get views count for a single channel' })
@@ -184,48 +174,10 @@ export class VideoViewsInfosResolver {
   }
 }
 
-const mapMostViewedArray = (views: Record<string, number>, limit?: number) =>
-  views
-    ? Object.keys(views)
-        .map((id) => ({ id, views: views[id] }))
-        .sort((a, b) => (a.views > b.views ? -1 : 1))
-        .slice(0, limit)
-    : []
-
-const sortAndLimitViews = (views: EntityViewsInfo[], limit: number) => {
-  return views.sort((a, b) => (a.views > b.views ? -1 : 1)).slice(0, limit)
-}
-
-const filterAllViewsByPeriod = (ctx: OrionContext, period: number): Partial<UnsequencedVideoEvent>[] => {
-  const views = ctx.viewsAggregate.getAllViewsEvents()
-  const filteredViews = []
-
-  for (let i = views.length - 1; i >= 0; i--) {
-    const { timestamp } = views[i]
-    if (timestamp && differenceInCalendarDays(new Date(), timestamp) > period) break
-    filteredViews.push(views[i])
-  }
-
-  return filteredViews
+const limitViews = (views: EntityViewsInfo[], limit?: number) => {
+  return views.slice(0, limit)
 }
 
-const buildMostViewedVideosArray = (ctx: OrionContext, period: number) =>
-  filterAllViewsByPeriod(ctx, period).reduce(
-    (entity: Record<string, number>, { videoId = '' }) => ({ ...entity, [videoId]: (entity[videoId] || 0) + 1 }),
-    {}
-  )
-
-const buildMostViewedChannelsArray = (ctx: OrionContext, period: number) =>
-  filterAllViewsByPeriod(ctx, period).reduce(
-    (entity: Record<string, number>, { channelId = '' }) => ({ ...entity, [channelId]: (entity[channelId] || 0) + 1 }),
-    {}
-  )
-
-const buildMostViewedCategoriesArray = (ctx: OrionContext, period: number) =>
-  filterAllViewsByPeriod(ctx, period).reduce((entity: Record<string, number>, { categoryId }) => {
-    return categoryId ? { ...entity, [categoryId]: (entity[categoryId] || 0) + 1 } : entity
-  }, {})
-
 const buildViewsObject = (id: string, views: number | null): EntityViewsInfo | null => {
   if (views != null) {
     return {

+ 9 - 9
tests/queries/views.ts

@@ -11,8 +11,8 @@ export const GET_VIDEO_VIEWS = gql`
 `
 
 export const GET_MOST_VIEWED_VIDEOS = gql`
-  query GetMostViewedVideos($period: Int!) {
-    mostViewedVideos(period: $period) {
+  query GetMostViewedVideos($timePeriodDays: Int!) {
+    mostViewedVideos(timePeriodDays: $timePeriodDays) {
       id
       views
     }
@@ -37,7 +37,7 @@ export type GetVideoViewsArgs = {
   videoId: string
 }
 export type GetMostViewedVideosArgs = {
-  period: number
+  timePeriodDays: number
 }
 export type GetMostViewedVideosAllTimeArgs = {
   limit: number
@@ -56,8 +56,8 @@ export const GET_CHANNEL_VIEWS = gql`
 `
 
 export const GET_MOST_VIEWED_CHANNELS = gql`
-  query GetMostViewedChannels($period: Int!) {
-    mostViewedChannels(period: $period) {
+  query GetMostViewedChannels($timePeriodDays: Int!) {
+    mostViewedChannels(timePeriodDays: $timePeriodDays) {
       id
       views
     }
@@ -82,7 +82,7 @@ export type GetChannelViewsArgs = {
   channelId: string
 }
 export type GetMostViewedChannelsArgs = {
-  period: number
+  timePeriodDays: number
 }
 export type GetMostViewedChannelsAllTimeArgs = {
   limit: number
@@ -92,8 +92,8 @@ export type GetMostViewedChannelsAllTime = {
 }
 
 export const GET_MOST_VIEWED_CATEGORIES = gql`
-  query GetMostViewedCategories($period: Int!) {
-    mostViewedCategories(period: $period) {
+  query GetMostViewedCategories($timePeriodDays: Int!) {
+    mostViewedCategories(timePeriodDays: $timePeriodDays) {
       id
       views
     }
@@ -112,7 +112,7 @@ export type GetMostViewedCategories = {
   mostViewedCategories: EntityViewsInfo[]
 }
 export type GetMostViewedCategoriesArgs = {
-  period: number
+  timePeriodDays: number
 }
 export type GetMostViewedCategoriesAllTimeArgs = {
   limit: number

+ 6 - 6
tests/views.test.ts

@@ -83,10 +83,10 @@ describe('Video and channel views resolver', () => {
     return videoViewsResponse.data?.videoViews
   }
 
-  const getMostViewedVideos = async (period: number) => {
+  const getMostViewedVideos = async (timePeriodDays: number) => {
     const mostViewedVideosResponse = await query<GetMostViewedVideos, GetMostViewedVideosArgs>({
       query: GET_MOST_VIEWED_VIDEOS,
-      variables: { period },
+      variables: { timePeriodDays },
     })
     expect(mostViewedVideosResponse.errors).toBeUndefined()
     return mostViewedVideosResponse.data?.mostViewedVideos
@@ -110,10 +110,10 @@ describe('Video and channel views resolver', () => {
     return channelViewsResponse.data?.channelViews
   }
 
-  const getMostViewedChannels = async (period: number) => {
+  const getMostViewedChannels = async (timePeriodDays: number) => {
     const mostViewedChannelsResponse = await query<GetMostViewedChannels, GetMostViewedChannelsArgs>({
       query: GET_MOST_VIEWED_CHANNELS,
-      variables: { period },
+      variables: { timePeriodDays },
     })
     expect(mostViewedChannelsResponse.errors).toBeUndefined()
     return mostViewedChannelsResponse.data?.mostViewedChannels
@@ -131,10 +131,10 @@ describe('Video and channel views resolver', () => {
     return mostViewedChannelsAllTimeResponse.data?.mostViewedChannelsAllTime
   }
 
-  const getMostViewedCategories = async (period: number) => {
+  const getMostViewedCategories = async (timePeriodDays: number) => {
     const mostViewedCategoriesResponse = await query<GetMostViewedCategories, GetMostViewedCategoriesArgs>({
       query: GET_MOST_VIEWED_CATEGORIES,
-      variables: { period },
+      variables: { timePeriodDays },
     })
     expect(mostViewedCategoriesResponse.errors).toBeUndefined()
     return mostViewedCategoriesResponse.data?.mostViewedCategories