Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nodejs example embed text not working #6

Open
Tayomide opened this issue Oct 12, 2023 · 2 comments
Open

Nodejs example embed text not working #6

Tayomide opened this issue Oct 12, 2023 · 2 comments

Comments

@Tayomide
Copy link

First off, thank you so much for the functions!

I am currently trying to embed texts using the nodejs example but the variable textTokenizer is not instantiated anywhere else. How would I go about doing that to get the text embeddings?

Thank you!

@josephrocca
Copy link
Owner

You should be able to just copy code from here: https://github.com/josephrocca/openai-clip-js/blob/main/onnx-text-demo.html

@Tayomide
Copy link
Author

Tayomide commented Oct 13, 2023

Thank you! I am currently making a node server and I haven't figured out how to import from https

Luckily there was a node package that did the exact same thing! Thank you so much for making this open-source, there's no way I would have figured the code out

Here's what I am currently using if anyone gets a similar issue, the server that uses this code can't be open-source sadly

// clipService.js
const { createCanvas, loadImage } = require('canvas');
const ort = require('onnxruntime-web');
const Tokenizer = require("clip-bpe-js").default

ort.env.wasm.numThreads = 1;

let onnxImageSession;
let onnxTextSession
const textTokenizer = new Tokenizer()

async function initialize() {
  console.log("Loading clip model...");
  onnxImageSession = await ort.InferenceSession.create("https://huggingface.co/rocca/openai-clip-js/resolve/main/clip-image-vit-32-float32.onnx", { executionProviders: ["wasm"] });
  onnxTextSession = await ort.InferenceSession.create("https://huggingface.co/rocca/openai-clip-js/resolve/main/clip-text-vit-32-float32-int32.onnx", { executionProviders: ["wasm"] });
  console.log("Loaded. Now running inference...");
}

async function embedImage(url) {
  let rgbData = await getRgbData(url);

  const feeds = {'input': new ort.Tensor('float32', rgbData, [1,3,224,224])};

  let t = Date.now();
  console.log("Running inference...");
  const results = await onnxImageSession.run(feeds);
  console.log(`Finished inference in ${Date.now()-t}ms`);

  const data = results["output"].data;

  return data;
}

async function embedText(text) {
  let textTokens = textTokenizer.encodeForCLIP(text);
  textTokens = Int32Array.from(textTokens);
  const feeds = {input: new ort.Tensor('int32', textTokens, [1, 77])};
  const results = await onnxTextSession.run(feeds);
  return [...results["output"].data];
}

async function getRgbData(imgUrl) {
  // ... (your existing getRgbData function)
  let img = await loadImage(imgUrl);
  let canvas = createCanvas(224, 224);
  let ctx = canvas.getContext("2d");
  ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
  let imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);

  let rgbData = [[], [], []]; // [r, g, b]
  // remove alpha and put into correct shape:
  let d = imageData.data;
  for(let i = 0; i < d.length; i += 4) { 
    let x = (i/4) % canvas.width;
    let y = Math.floor((i/4) / canvas.width)
    if(!rgbData[0][y]) rgbData[0][y] = [];
    if(!rgbData[1][y]) rgbData[1][y] = [];
    if(!rgbData[2][y]) rgbData[2][y] = [];
    rgbData[0][y][x] = d[i+0]/255;
    rgbData[1][y][x] = d[i+1]/255;
    rgbData[2][y][x] = d[i+2]/255;
    // From CLIP repo: Normalize(mean=(0.48145466, 0.4578275, 0.40821073), std=(0.26862954, 0.26130258, 0.27577711))
    rgbData[0][y][x] = (rgbData[0][y][x] - 0.48145466) / 0.26862954;
    rgbData[1][y][x] = (rgbData[1][y][x] - 0.4578275) / 0.26130258;
    rgbData[2][y][x] = (rgbData[2][y][x] - 0.40821073) / 0.27577711;
  }
  rgbData = Float32Array.from(rgbData.flat().flat());
  return rgbData;
}

module.exports = {
  initialize,
  embedImage,
  embedText,
  getRgbData,
};

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants