fix(server): use job queue instead event on doc indexing changes (#12893)

close CLOUD-231



#### PR Dependency Tree


* **PR #12893** 👈

This tree was auto-generated by
[Charcoal](https://github.com/danerwilliams/charcoal)

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Refactor**
- Updated background processing for document indexing and deletion to
use a job queue system instead of event-based triggers.
- **Bug Fixes**
- Improved reliability of embedding updates and deletions by ensuring
tasks are properly queued and processed.
- **Tests**
- Adjusted tests to verify that document operations correctly trigger
job queue actions.
  
No changes to user-facing features or interface.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
fengmk2 2025-06-23 14:56:04 +08:00 committed by GitHub
parent 705d2e9bbe
commit 862a50c982
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 30 additions and 27 deletions

View File

@ -159,8 +159,10 @@ export class CopilotEmbeddingJob {
} }
} }
@OnEvent('doc.indexer.updated') @OnJob('copilot.embedding.updateDoc')
async addDocEmbeddingQueueFromEvent(doc: Events['doc.indexer.updated']) { async addDocEmbeddingQueueFromEvent(
doc: Jobs['copilot.embedding.updateDoc']
) {
if (!this.supportEmbedding || !this.embeddingClient) return; if (!this.supportEmbedding || !this.embeddingClient) return;
await this.queue.add( await this.queue.add(
@ -176,8 +178,10 @@ export class CopilotEmbeddingJob {
); );
} }
@OnEvent('doc.indexer.deleted') @OnJob('copilot.embedding.deleteDoc')
async deleteDocEmbeddingQueueFromEvent(doc: Events['doc.indexer.deleted']) { async deleteDocEmbeddingQueueFromEvent(
doc: Jobs['copilot.embedding.deleteDoc']
) {
await this.queue.remove( await this.queue.remove(
`workspace:embedding:${doc.workspaceId}:${doc.docId}`, `workspace:embedding:${doc.workspaceId}:${doc.docId}`,
'copilot.embedding.docs' 'copilot.embedding.docs'

View File

@ -43,6 +43,16 @@ declare global {
docId: string; docId: string;
}; };
'copilot.embedding.updateDoc': {
workspaceId: string;
docId: string;
};
'copilot.embedding.deleteDoc': {
workspaceId: string;
docId: string;
};
'copilot.embedding.files': { 'copilot.embedding.files': {
contextId?: string; contextId?: string;
userId: string; userId: string;

View File

@ -1884,12 +1884,12 @@ test('should delete doc work', async t => {
t.is(result4.nodes.length, 1); t.is(result4.nodes.length, 1);
t.deepEqual(result4.nodes[0].fields.docId, [docId2]); t.deepEqual(result4.nodes[0].fields.docId, [docId2]);
const count = module.event.count('doc.indexer.deleted'); const count = module.queue.count('copilot.embedding.deleteDoc');
await indexerService.deleteDoc(workspaceId, docId1, { await indexerService.deleteDoc(workspaceId, docId1, {
refresh: true, refresh: true,
}); });
t.is(module.event.count('doc.indexer.deleted'), count + 1); t.is(module.queue.count('copilot.embedding.deleteDoc'), count + 1);
// make sure the docId1 is deleted // make sure the docId1 is deleted
result1 = await indexerService.search({ result1 = await indexerService.search({
@ -2044,7 +2044,7 @@ test('should list doc ids work', async t => {
// #region indexDoc() // #region indexDoc()
test('should index doc work', async t => { test('should index doc work', async t => {
const count = module.event.count('doc.indexer.updated'); const count = module.queue.count('copilot.embedding.updateDoc');
const docSnapshot = await module.create(Mockers.DocSnapshot, { const docSnapshot = await module.create(Mockers.DocSnapshot, {
workspaceId: workspace.id, workspaceId: workspace.id,
user, user,
@ -2110,7 +2110,7 @@ test('should index doc work', async t => {
t.snapshot( t.snapshot(
result2.nodes.map(node => omit(node.fields, ['workspaceId', 'docId'])) result2.nodes.map(node => omit(node.fields, ['workspaceId', 'docId']))
); );
t.is(module.event.count('doc.indexer.updated'), count + 1); t.is(module.queue.count('copilot.embedding.updateDoc'), count + 1);
}); });
// #endregion // #endregion

View File

@ -27,16 +27,3 @@ export class IndexerModule {}
export { IndexerService }; export { IndexerService };
export type { SearchDoc } from './types'; export type { SearchDoc } from './types';
declare global {
interface Events {
'doc.indexer.updated': {
workspaceId: string;
docId: string;
};
'doc.indexer.deleted': {
workspaceId: string;
docId: string;
};
}
}

View File

@ -2,8 +2,8 @@ import { Injectable, Logger } from '@nestjs/common';
import { camelCase, chunk, mapKeys, snakeCase } from 'lodash-es'; import { camelCase, chunk, mapKeys, snakeCase } from 'lodash-es';
import { import {
EventBus,
InvalidIndexerInput, InvalidIndexerInput,
JobQueue,
SearchProviderNotFound, SearchProviderNotFound,
} from '../../base'; } from '../../base';
import { readAllBlocksFromDocSnapshot } from '../../core/utils/blocksuite'; import { readAllBlocksFromDocSnapshot } from '../../core/utils/blocksuite';
@ -110,7 +110,7 @@ export class IndexerService {
constructor( constructor(
private readonly models: Models, private readonly models: Models,
private readonly factory: SearchProviderFactory, private readonly factory: SearchProviderFactory,
private readonly event: EventBus private readonly queue: JobQueue
) {} ) {}
async createTables() { async createTables() {
@ -285,11 +285,12 @@ export class IndexerService {
})), })),
options options
); );
this.event.emit('doc.indexer.updated', {
await this.queue.add('copilot.embedding.updateDoc', {
workspaceId, workspaceId,
docId, docId,
}); });
this.logger.debug( this.logger.log(
`synced doc ${workspaceId}/${docId} with ${result.blocks.length} blocks` `synced doc ${workspaceId}/${docId} with ${result.blocks.length} blocks`
); );
} }
@ -319,12 +320,13 @@ export class IndexerService {
}, },
options options
); );
this.logger.debug(`deleted doc ${workspaceId}/${docId}`);
await this.deleteBlocksByDocId(workspaceId, docId, options); await this.deleteBlocksByDocId(workspaceId, docId, options);
this.event.emit('doc.indexer.deleted', { await this.queue.add('copilot.embedding.deleteDoc', {
workspaceId, workspaceId,
docId, docId,
}); });
this.logger.log(`deleted doc ${workspaceId}/${docId}`);
} }
async deleteBlocksByDocId( async deleteBlocksByDocId(