Arize-ai · cjunkin · May 25, 2024 · May 29, 2024 · Aug 1, 2024 · Aug 9, 2024
diff --git a/.github/workflows/typescript-CI.yaml b/.github/workflows/typescript-CI.yaml
@@ -2,7 +2,7 @@ name: Typescript CI
 
 on:
     push:
-        branches: [main, langchainjs]
+        branches: [main, llama-index-ts]
     pull_request:
         paths:
             - "js/**"

diff --git a/cspell.json b/cspell.json
@@ -2,6 +2,7 @@
     "version": "0.2",
     "language": "en",
     "words": [
+        "abramov",
         "arize",
         "arizeai",
         "autouse",
@@ -11,6 +12,7 @@
         "instrumentator",
         "Instrumentor",
         "langchain",
+        "llamaindex",
         "llms",
         "nextjs",
         "openinference",

@@ -23,7 +23,13 @@
   "license": "Apache-2.0",
   "devDependencies": {
     "@changesets/cli": "^2.27.1",
+    "@opentelemetry/exporter-trace-otlp-proto": "^0.50.0",
+    "@opentelemetry/resources": "^1.20.0",
+    "@opentelemetry/sdk-trace-base": "^1.24.1",
+    "@opentelemetry/sdk-trace-node": "^1.24.1",
+    "@opentelemetry/semantic-conventions": "^1.24.1",
     "@types/jest": "^29.5.11",
+    "@types/node": "^20.12.4",
     "@typescript-eslint/eslint-plugin": "^6.17.0",
     "@typescript-eslint/parser": "^6.17.0",
     "eslint": "^8.56.0",

@@ -1,4 +1,5 @@
 /** @type {import('ts-jest').JestConfigWithTsJest} */
+
 module.exports = {
   preset: "ts-jest",
   testEnvironment: "node",

@@ -34,13 +34,6 @@
   "devDependencies": {
     "@langchain/core": "^0.1.57",
     "@langchain/openai": "^0.0.25",
-    "@opentelemetry/exporter-trace-otlp-proto": "^0.50.0",
-    "@opentelemetry/resources": "^1.19.0",
-    "@opentelemetry/sdk-trace-base": "^1.19.0",
-    "@opentelemetry/sdk-trace-node": "^1.19.0",
-    "@opentelemetry/semantic-conventions": "^1.19.0",
-    "@types/jest": "^29.5.11",
-    "@types/node": "^20.12.4",
     "dotenv": "^16.4.5",
     "jest": "^29.7.0",
     "langchain": "^0.1.30",

@@ -0,0 +1,37 @@
+import { LlamaIndexInstrumentation } from "../src/index";
+import { ConsoleSpanExporter } from "@opentelemetry/sdk-trace-base";
+import {
+  NodeTracerProvider,
+  SimpleSpanProcessor,
+} from "@opentelemetry/sdk-trace-node";
+import { Resource } from "@opentelemetry/resources";
+import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-proto";
+import { SEMRESATTRS_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
+import { diag, DiagConsoleLogger, DiagLogLevel } from "@opentelemetry/api";
+import { registerInstrumentations } from "@opentelemetry/instrumentation";
+// For troubleshooting, set the log level to DiagLogLevel.DEBUG
+diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.DEBUG);
+
+const provider = new NodeTracerProvider({
+  resource: new Resource({
+    [SEMRESATTRS_SERVICE_NAME]: "llama-index-service",
+  }),
+});
+
+provider.addSpanProcessor(new SimpleSpanProcessor(new ConsoleSpanExporter()));
+provider.addSpanProcessor(
+  new SimpleSpanProcessor(
+    new OTLPTraceExporter({
+      url: "http://localhost:6006/v1/traces",
+    }),
+  ),
+);
+
+registerInstrumentations({
+  instrumentations: [new LlamaIndexInstrumentation()],
+});
+
+provider.register();
+
+// eslint-disable-next-line no-console
+console.log("👀 OpenInference initialized");
@@ -0,0 +1,30 @@
+import "./instrumentation";
+import fs from "fs/promises";
+import { VectorStoreIndex } from "llamaindex";
+import { Document } from "@llamaindex/core/dist/schema";
+
+async function main() {
+  // Load essay from abramov.txt in Node
+  const essay = await fs.readFile(
+    "node_modules/llamaindex/examples/abramov.txt",
+    "utf-8",
+  );
+
+  // Create Document object with essay
+  const document = new Document({ text: essay });
+
+  // Split text and create embeddings. Store them in a VectorStoreIndex
+  const index = await VectorStoreIndex.fromDocuments([document]);
+
+  // Query the index
+  const queryEngine = index.asQueryEngine();
+  const response = await queryEngine.query({
+    query: "What did the author do in college?",
+  });
+
+  // Output response
+  // eslint-disable-next-line no-console
+  console.log(response.toString());
+}
+
+main();
@@ -1,4 +1,5 @@
 /** @type {import('ts-jest').JestConfigWithTsJest} */
+
 module.exports = {
   preset: "ts-jest",
   testEnvironment: "node",

@@ -11,14 +11,23 @@
     "prebuild": "rimraf dist & pnpm run version:update",
     "build": "tsc --build tsconfig.json tsconfig.esm.json tsconfig.esnext.json",
     "version:update": "../../scripts/version-update.js",
-    "type:check": "tsc --noEmit"
+    "type:check": "tsc --noEmit",
+    "test": "jest"
   },
   "keywords": [],
   "author": "oss-devs@arize.com",
   "license": "Apache-2.0",
   "dependencies": {
+    "@arizeai/openinference-instrumentation-llama-index": "link:",
+    "@arizeai/openinference-semantic-conventions": "workspace:*",
     "@opentelemetry/api": "^1.7.0",
-    "@opentelemetry/instrumentation": "^0.46.0",
-    "@arizeai/openinference-semantic-conventions": "workspace:*"
+    "@opentelemetry/core": "^1.23.0",
+    "@opentelemetry/instrumentation": "^0.46.0"
+  },
+  "devDependencies": {
+    "@llamaindex/core": "^0.1.8",
+    "jest": "^29.7.0",
+    "llamaindex": "^0.5.0",
+    "openai": "^4.24.1"
   }
 }
@@ -0,0 +1 @@
+export * from "./instrumentation";
@@ -0,0 +1,131 @@
+import type * as llamaindex from "llamaindex";
+
+import {
+  InstrumentationBase,
+  InstrumentationConfig,
+  InstrumentationModuleDefinition,
+  InstrumentationNodeModuleDefinition,
+} from "@opentelemetry/instrumentation";
+import { diag } from "@opentelemetry/api";
+import {
+  isRetrieverPrototype,
+  isEmbeddingPrototype,
+  isLLMPrototype,
+} from "./utils";
+import {
+  patchQueryEngineQueryMethod,
+  patchRetrieveMethod,
+  patchQueryEmbeddingMethod,
+  patchLLMChat,
+} from "./patch";
+import { VERSION } from "./version";
+
+const MODULE_NAME = "llamaindex";
+
+/**
+ * Flag to check if the LlamaIndex module has been patched
+ * Note: This is a fallback in case the module is made immutable (e.x. Deno, webpack, etc.)
+ */
+let _isOpenInferencePatched = false;
+
+/**
+ * function to check if instrumentation is enabled / disabled
+ */
+export function isPatched() {
+  return _isOpenInferencePatched;
+}
+
+export class LlamaIndexInstrumentation extends InstrumentationBase<
+  typeof llamaindex
+> {
+  constructor(config?: InstrumentationConfig) {
+    super(
+      "@arizeai/openinference-instrumentation-llama-index",
+      VERSION,
+      Object.assign({}, config),
+    );
+  }
+
+  public manuallyInstrument(module: typeof llamaindex) {
+    diag.debug(`Manually instrumenting ${MODULE_NAME}`);
+    this.patch(module);
+  }
+
+  protected init(): InstrumentationModuleDefinition<typeof llamaindex> {
+    const module = new InstrumentationNodeModuleDefinition<typeof llamaindex>(
+      "llamaindex",
+      [">=0.5.0"],
+      this.patch.bind(this),
+      this.unpatch.bind(this),
+    );
+    return module;
+  }
+
+  private patch(moduleExports: typeof llamaindex, moduleVersion?: string) {
+    this._diag.debug(`Applying patch for ${MODULE_NAME}@${moduleVersion}`);
+    if (_isOpenInferencePatched) {
+      return moduleExports;
+    }
+
+    // TODO: Support streaming
+    // TODO: Generalize to QueryEngine interface (RetrieverQueryEngine, RouterQueryEngine)
+    this._wrap(
+      moduleExports.RetrieverQueryEngine.prototype,
+      "query",
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      (original): any => {
+        return patchQueryEngineQueryMethod(original, this.tracer);
+      },
+    );
+
+    for (const value of Object.values(moduleExports)) {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const prototype = (value as any).prototype;
+
+      if (isRetrieverPrototype(prototype)) {
+        this._wrap(prototype, "retrieve", (original) => {
+          return patchRetrieveMethod(original, this.tracer);
+        });
+      }
+
+      if (isEmbeddingPrototype(prototype)) {
+        this._wrap(prototype, "getQueryEmbedding", (original) => {
+          return patchQueryEmbeddingMethod(original, this.tracer);
+        });
+      }
+
+      if (isLLMPrototype(prototype)) {
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        this._wrap(prototype, "chat", (original): any => {
+          return patchLLMChat(original, this.tracer);
+        });
+      }
+    }
+    _isOpenInferencePatched = true;
+    return moduleExports;
+  }
+
+  private unpatch(moduleExports: typeof llamaindex, moduleVersion?: string) {
+    this._diag.debug(`Un-patching ${MODULE_NAME}@${moduleVersion}`);
+    this._unwrap(moduleExports.RetrieverQueryEngine.prototype, "query");
+
+    for (const value of Object.values(moduleExports)) {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const prototype = (value as any).prototype;
+
+      if (isRetrieverPrototype(prototype)) {
+        this._unwrap(prototype, "retrieve");
+      }
+
+      if (isEmbeddingPrototype(prototype)) {
+        this._unwrap(prototype, "getQueryEmbedding");
+      }
+
+      if (isLLMPrototype(prototype)) {
+        this._unwrap(prototype, "chat");
+      }
+    }
+
+    _isOpenInferencePatched = false;
+  }
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,7 +2,7 @@ name: Typescript CI @@
     on:
         push:
-            branches: [main, langchainjs]
+            branches: [main, llama-index-ts]
         pull_request:
             paths:
                 - "js/**"
@@ Expand Down @@