Skip to content

Commit

Permalink
H-3162: Look for matching entities in database when inserting entitie…
Browse files Browse the repository at this point in the history
…s from Flow (#6102)
  • Loading branch information
CiaranMn authored Jan 15, 2025
1 parent 4d81b44 commit b9a846e
Show file tree
Hide file tree
Showing 23 changed files with 1,640 additions and 292 deletions.
3 changes: 2 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
"javascriptreact",
"typescript",
"typescriptreact"
]
],
"vitest.disableWorkspaceWarning": true
}
1 change: 1 addition & 0 deletions apps/hash-ai-worker-ts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ The service uses the following environment variables:

### Run the worker

- To use actions which require Google Cloud Platform, you must run `gcloud auth application-default login` before starting the worker.
- Ensure the environment variables above are set, either in `.env.local` or in your shell.
- Install dependencies:
- `yarn`
Expand Down
1 change: 1 addition & 0 deletions apps/hash-ai-worker-ts/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"start:healthcheck": "wait-on --timeout 120000 http-get://localhost:4100/health",
"start:test": "NODE_ENV=test NODE_OPTIONS=--max-old-space-size=2048 node ./dist/main.js",
"start:test:healthcheck": "wait-on --timeout 120000 http-get://localhost:4100/health",
"temporal:clean": "temporal workflow terminate --query \"TaskQueue='ai'\" --reason=\"Batch terminate from CLI\"",
"test:unit": "vitest --run --exclude \"**/*.ai.test.ts\""
},
"dependencies": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ type ModelResponseArgs = {

const maximumIterations = 10;

const model: PermittedOpenAiModel = "gpt-4o";
const model: PermittedOpenAiModel = "gpt-4o-2024-08-06";

const callModel = async (
messages: OpenAI.ChatCompletionCreateParams["messages"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,11 @@ export const persistEntitiesAction: FlowActionActivity = async ({ inputs }) => {
> = {};

/**
* We could potentially parallelize the creation of (a) non-link entities and (b) link entities,
* We could potentially parallelize the creation of (a) non-link entities and then (b) link entities in batches,
* if performance of this function becomes an issue.
*
* We need to create the links after all the non-links as the ids of the links may change,
* if an existing entity is found to update rather than a new one with the localId being created.
*/
for (const unresolvedEntity of entitiesWithDependenciesSortedLast) {
const {
Expand Down Expand Up @@ -188,7 +191,9 @@ export const persistEntitiesAction: FlowActionActivity = async ({ inputs }) => {
if (!output) {
failedEntitiesByLocalId[unresolvedEntity.localEntityId] = {
proposedEntity: unresolvedEntity,
message: `No outputs returned when attempting to persist entity`,
message:
persistedEntityOutputs.message ??
`No outputs returned when attempting to persist entity`,
};
continue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ import { backOff } from "exponential-backoff";
import { getAiAssistantAccountIdActivity } from "../get-ai-assistant-account-id-activity.js";
import { extractErrorMessage } from "../infer-entities/shared/extract-validation-failure-details.js";
import { createInferredEntityNotification } from "../shared/create-inferred-entity-notification.js";
// import {
// findExistingEntity,
// findExistingLinkEntity,
// } from "../shared/find-existing-entity";
import {
findExistingEntity,
findExistingLinkEntity,
} from "../shared/find-existing-entity.js";
import { getFlowContext } from "../shared/get-flow-context.js";
import { graphApiClient } from "../shared/graph-api-client.js";
import { logProgress } from "../shared/log-progress.js";
import type { MatchedEntityUpdate } from "../shared/match-existing-entity.js";
import { createFileEntityFromUrl } from "./shared/create-file-entity-from-url.js";
import {
getEntityUpdate,
Expand Down Expand Up @@ -125,7 +126,7 @@ export const persistEntityAction: FlowActionActivity = async ({ inputs }) => {
: undefined;

let entity: Entity;
let existingEntity: Entity | undefined;
let matchedEntityUpdate: MatchedEntityUpdate<Entity> | null = null;
let operation: "create" | "update";

if (isFileEntity && fileUrl) {
Expand All @@ -143,95 +144,67 @@ export const persistEntityAction: FlowActionActivity = async ({ inputs }) => {
return {
code: StatusCode.Internal,
message: createFileEntityFromUrlStatus.message,
contents: [
{
outputs: [
{
outputName:
"persistedEntity" as OutputNameForAction<"persistEntity">,
payload: {
kind: "PersistedEntity",
value: { operation },
},
},
],
},
],
contents: [],
};
}

const { entity: updatedEntity } = createFileEntityFromUrlStatus;

entity = updatedEntity;
} else {
/**
* @todo: improve the logic for finding existing entities, to
* reduce the number of false positives.
*/
// existingEntity = await (linkData
// ? findExistingLinkEntity({
// actorId,
// graphApiClient,
// ownedById,
// linkData,
// includeDrafts: createEditionAsDraft,
// })
// : findExistingEntity({
// actorId,
// graphApiClient,
// ownedById,
// proposedEntity: proposedEntityWithResolvedLinks,
// includeDrafts: createEditionAsDraft,
// }));
matchedEntityUpdate = await (linkData
? /**
* @todo H-3883 ensure that the creation of a new link will not violate min/max links on an entity
*/
findExistingLinkEntity({
actorId,
graphApiClient,
ownedById,
linkData,
proposedEntity: proposedEntityWithResolvedLinks,
includeDrafts: createEditionAsDraft,
})
: findExistingEntity({
actorId,
graphApiClient,
ownedById,
proposedEntity: proposedEntityWithResolvedLinks,
includeDrafts: createEditionAsDraft,
}));

operation = existingEntity ? "update" : "create";
operation = matchedEntityUpdate ? "update" : "create";

try {
if (existingEntity) {
const { existingEntityIsDraft, isExactMatch, patchOperations } =
getEntityUpdate({
existingEntity,
newProperties: mergePropertyObjectAndMetadata(
properties,
undefined,
),
});

const serializedEntity = existingEntity.toJSON();

if (isExactMatch) {
return {
code: StatusCode.Ok,
contents: [
{
outputs: [
{
outputName:
"persistedEntity" as OutputNameForAction<"persistEntity">,
payload: {
kind: "PersistedEntity",
value: {
entity: serializedEntity,
existingEntity: serializedEntity,
operation: "already-exists-as-proposed",
},
},
},
],
},
],
};
}
if (matchedEntityUpdate) {
const { existingEntityIsDraft, patchOperations } = getEntityUpdate({
existingEntity: matchedEntityUpdate.existingEntity,
newPropertiesWithMetadata: mergePropertyObjectAndMetadata(
matchedEntityUpdate.newValues.properties,
matchedEntityUpdate.newValues.propertyMetadata,
),
});

/**
* In practice we don't reassign matchedEntityUpdate anywhere below it doesn't harm to make sure it will always
* be the same thing in the backOff function.
*/
const stableReferenceToMatchedEntity = matchedEntityUpdate;

entity = await backOff(
() =>
existingEntity.patch(
stableReferenceToMatchedEntity.existingEntity.patch(
graphApiClient,
{ actorId: webBotActorId },
{
...entityValues,
entityTypeIds:
stableReferenceToMatchedEntity.newValues.entityTypeIds,
draft: existingEntityIsDraft ? true : createEditionAsDraft,
propertyPatches: patchOperations,
provenance: {
...entityValues.provenance,
sources:
stableReferenceToMatchedEntity.newValues.editionSources,
},
},
),
{
Expand Down Expand Up @@ -267,30 +240,14 @@ export const persistEntityAction: FlowActionActivity = async ({ inputs }) => {
return {
code: StatusCode.Internal,
message: `Could not persist entity: ${extractErrorMessage(err)}`,
contents: [
{
outputs: [
{
outputName:
"persistedEntity" as OutputNameForAction<"persistEntity">,
payload: {
kind: "PersistedEntity",
value: {
existingEntity: existingEntity?.toJSON(),
operation,
},
},
},
],
},
],
contents: [],
};
}
}

const persistedEntity = {
entity: entity.toJSON(),
existingEntity: existingEntity?.toJSON(),
existingEntity: matchedEntityUpdate?.existingEntity.toJSON(),
operation,
} satisfies PersistedEntity;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,7 @@ const baseDirectoryPath = path.join(
test(
"Extract links form text system prompt test",
async () => {
const models: LlmParams["model"][] = [
// "claude-3-5-sonnet-20240620",
"claude-3-haiku-20240307",
// "gpt-4o",
];
const models: LlmParams["model"][] = ["claude-3-haiku-20240307"];

await optimizeSystemPrompt({
models,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { EntityId } from "@local/hash-graph-types/entity";
import { sleep } from "@local/hash-isomorphic-utils/sleep";
import dedent from "dedent";

import { logger } from "../../../shared/activity-logger.js";
Expand Down Expand Up @@ -194,6 +195,9 @@ export const deduplicateEntities = async (params: {
}

logger.error(`Error deduplicating entities: ${llmResponse.status}`);

await sleep(2_000);

return deduplicateEntities(params);
}

Expand Down
Loading

0 comments on commit b9a846e

Please sign in to comment.