temp commit, just cleaning out my disk

boredcoder411 · Dec 12, 2024 · 21aa243 · 21aa243
1 parent e6700d8
commit 21aa243
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 1 deletion.
diff --git a/caption_encoder.py b/caption_encoder.py
@@ -0,0 +1,24 @@
+import numpy as np
+from sklearn.feature_extraction.text import CountVectorizer
+
+def sentence_to_random_embedding(sentence, embedding_dim=1024):
+    # Tokenize the sentence using a simple bag-of-words method
+    vectorizer = CountVectorizer()
+    token_counts = vectorizer.fit_transform([sentence])
+    vocabulary = vectorizer.get_feature_names_out()
+
+    # Assign each word in the vocabulary a random 1024-dimensional vector
+    word_embeddings = {word: np.random.randn(embedding_dim) for word in vocabulary}
+
+    # Sum the embeddings of each word in the sentence
+    sentence_embedding = np.zeros(embedding_dim)
+    for word in vocabulary:
+        sentence_embedding += token_counts[0, vectorizer.vocabulary_[word]] * word_embeddings[word]
+
+    return sentence_embedding
+
+# Example usage
+sentence = "This is an example sentence to encode into a 1024-dimensional vector."
+embedding = sentence_to_random_embedding(sentence)
+print(embedding)
+print("Embedding shape:", embedding.shape)
diff --git a/diffusion_models.py b/diffusion_models.py
@@ -7,10 +7,15 @@
 from keras.src.datasets.cifar10 import load_data
 from unet import UNet
 
-(trainX, trainy), (testX, testy) = load_data()
+(trainX, _), (testX, _) = load_data()
 trainX = np.float32(trainX) / 255.
 testX = np.float32(testX) / 255.
 
+# print the labels
+print(trainX.shape)
+
+exit()
+
 # Ensure the shape is (N, C, H, W)
 trainX = trainX.transpose(0, 3, 1, 2)  # Change to (num_samples, channels, height, width)
 testX = testX.transpose(0, 3, 1, 2)  # Change to (num_samples, channels, height, width)