Skip to content

Commit

Permalink
H-3618: Add GitHub action step to authenticate with Google Cloud, for…
Browse files Browse the repository at this point in the history
… `chonky` tests (#5687)

Co-authored-by: JesusFileto <91223391+JesusFileto@users.noreply.github.com>
Co-authored-by: Jesus Fileto <jesus_filetojr@icloud.com>
  • Loading branch information
3 people authored Dec 4, 2024
1 parent 2652f84 commit c999477
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ blocks/**/.env
.env.local
.env.*.local

# Generated credentials from google-github-actions/auth
gha-creds-*.json

# macOS directory file
**/.DS_Store

Expand Down
37 changes: 37 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ jobs:
fail-fast: false
if: needs.setup.outputs.unit-tests != '{"package":[],"include":[]}'
runs-on: ubuntu-24.04
permissions:
# Required to fetch an OIDC token, used to auth with Google Cloud Platform for @rust/chonky tests
id-token: "write"
# Maintain permission to read repo contents
contents: "read"
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand Down Expand Up @@ -148,6 +153,15 @@ jobs:
rm -rf $temp_dir
echo "PDFIUM_DYNAMIC_LIB_PATH=$(pwd)/${{ matrix.directory }}/libs/" >> $GITHUB_ENV
# Sets GOOGLE_APPLICATION_CREDENTIALS in the environment, to be consumed by gcloud or client libraries
- name: Generate Google Cloud credential configuration
if: matrix.package == '@rust/chonky'
uses: google-github-actions/auth@6fc4af4b145ae7821d527454aa9bd537d1f2dc5f # v2.1.7
with:
project_id: ${{ secrets.GOOGLE_CLOUD_HASH_PROJECT_ID }}
service_account: ${{ secrets.GOOGLE_CLOUD_VERTEX_SERVICE_ACCOUNT }}
workload_identity_provider: ${{ secrets.GOOGLE_CLOUD_IDENTITY_PROVIDER }}

- name: Install Rust toolchain
if: always() && steps.tests.outputs.has-rust == 'true'
uses: ./.github/actions/install-rust-toolchain
Expand Down Expand Up @@ -178,6 +192,10 @@ jobs:
continue-on-error: ${{ steps.tests.outputs.allow-failure == 'true' }}
env:
TEST_COVERAGE: ${{ github.event_name != 'merge_group' }}
# Variables needed for chonky tests
GOOGLE_PROJECT_ID: ${{ secrets.GOOGLE_CLOUD_HASH_PROJECT_ID }}
GOOGLE_DEFAULT_CREDENTIALS: ${{ env.GOOGLE_DEFAULT_CREDENTIALS }} # set by google-github-actions/auth
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
run: |
turbo run test:unit --env-mode=loose --filter "${{ matrix.package }}"
echo "TRIMMED_PACKAGE_NAME=$(echo "${{ matrix.package }}" | sed 's|@||g' | sed 's|/|.|g')" >> $GITHUB_ENV
Expand Down Expand Up @@ -225,6 +243,11 @@ jobs:
fail-fast: false
if: needs.setup.outputs.integration-tests != '{"package":[],"include":[]}'
runs-on: ubuntu-24.04
permissions:
# Required to fetch an OIDC token, used to auth with Google Cloud Platform for @rust/chonky tests
id-token: "write"
# Maintain permission to read repo contents
contents: "read"
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand Down Expand Up @@ -293,6 +316,15 @@ jobs:
rm -rf $temp_dir
echo "PDFIUM_DYNAMIC_LIB_PATH=$(pwd)/${{ matrix.directory }}/libs/" >> $GITHUB_ENV
# Sets GOOGLE_APPLICATION_CREDENTIALS in the environment, to be consumed by gcloud or client libraries
- name: Generate Google Cloud credential configuration
if: matrix.package == '@rust/chonky'
uses: google-github-actions/auth@6fc4af4b145ae7821d527454aa9bd537d1f2dc5f # v2.1.7
with:
project_id: ${{ secrets.GOOGLE_CLOUD_HASH_PROJECT_ID }}
service_account: ${{ secrets.GOOGLE_CLOUD_VERTEX_SERVICE_ACCOUNT }}
workload_identity_provider: ${{ secrets.GOOGLE_CLOUD_IDENTITY_PROVIDER }}

- name: Install playwright
if: matrix.package == '@tests/hash-playwright'
uses: nick-fields/retry@7152eba30c6575329ac0576536151aca5a72780e # v3.0.0
Expand Down Expand Up @@ -350,6 +382,11 @@ jobs:
- name: Run tests
continue-on-error: ${{ steps.tests.outputs.allow-failure == 'true' }}
env:
# Variables needed for chonky tests
GOOGLE_PROJECT_ID: ${{ secrets.GOOGLE_CLOUD_HASH_PROJECT_ID }}
GOOGLE_DEFAULT_CREDENTIALS: ${{ env.GOOGLE_DEFAULT_CREDENTIALS }} # set by google-github-actions/auth
HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
run: |
turbo run test:integration --env-mode=loose --filter "${{ matrix.package }}"
echo "TRIMMED_PACKAGE_NAME=$(echo "${{ matrix.package }}" | sed 's|@||g' | sed 's|/|.|g')" >> $GITHUB_ENV
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ blocks/**/.env
.env.local
.env.*.local

# Generated credentials from google-github-actions/auth
gha-creds-*.json

# macOS directory file
**/.DS_Store

Expand Down
3 changes: 3 additions & 0 deletions .markdownlintignore
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ blocks/**/.env
.env.local
.env.*.local

# Generated credentials from google-github-actions/auth
gha-creds-*.json

# macOS directory file
**/.DS_Store

Expand Down
29 changes: 25 additions & 4 deletions libs/chonky/src/embedding/multi_modal_embedding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,13 @@ pub async fn make_multimodal_api_request(
if !response.status().is_success() {
return Err(
Report::new(ChonkyError::VertexAPI).attach_printable(format!(
"Received the error code {} in the response status",
response.status()
"Received the error code {} in the response status with error text {:?}",
response.status(),
response
.error_for_status()
.change_context(ChonkyError::VertexAPI)?
.text()
.await,
)),
);
}
Expand Down Expand Up @@ -289,6 +294,7 @@ pub fn add_structural_embedding(
#[cfg(test)]
mod tests {
use insta::{assert_binary_snapshot, assert_snapshot};
use serde_json::to_string_pretty;
use tokio::fs;

use super::*;
Expand All @@ -301,7 +307,17 @@ mod tests {
.await
.change_context(ChonkyError::ImageError)?;
// source of truth found by decoding base64 encoding to get same image
assert_binary_snapshot!("page_1.json", base64_json(image_data).to_string().into());
// must use string_pretty since there is autoformating done by compiler with addition of
// newline
assert_binary_snapshot!(
"page_1.json",
format!(
"{}\n",
to_string_pretty(&base64_json(image_data))
.change_context(ChonkyError::ImageError)?
)
.into()
);
Ok(())
}

Expand All @@ -327,8 +343,13 @@ mod tests {
.await
.change_context(ChonkyError::ImageError)?;

//project id

let project_id =
std::env::var("GOOGLE_PROJECT_ID").change_context(ChonkyError::VertexAPI)?;

let test_embedding =
make_multimodal_api_request("hash-embed", Some(image_data), None).await?;
make_multimodal_api_request(&project_id, Some(image_data), None).await?;

//find cosine similarity of vectors

Expand Down

0 comments on commit c999477

Please sign in to comment.