From 105bb1c14e24d28516b4898ef25acb0d400671a5 Mon Sep 17 00:00:00 2001
From: richelbilderbeek <rjcbilderbeek@gmail.com>
Date: Mon, 9 Sep 2024 08:45:56 +0200
Subject: [PATCH] Add Continuous Integration script

---
 .github/workflows/check_build.yml             | 24 +++++++++++++++++++
 .../example_huggingface_newer_than_v4_28.py   | 16 +++++++++++++
 .../workflows/example_huggingface_v4_28.py    | 17 +++++++++++++
 3 files changed, 57 insertions(+)
 create mode 100644 .github/workflows/check_build.yml
 create mode 100644 .github/workflows/example_huggingface_newer_than_v4_28.py
 create mode 100644 .github/workflows/example_huggingface_v4_28.py

diff --git a/.github/workflows/check_build.yml b/.github/workflows/check_build.yml
new file mode 100644
index 0000000..ef85bf0
--- /dev/null
+++ b/.github/workflows/check_build.yml
@@ -0,0 +1,24 @@
+# Checks if the build works, 
+# by installing the requirements
+# and then running the example code
+
+name: Check build
+
+on:
+  push:
+    branches:
+      - main
+jobs:
+  check_build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+
+      - name: install required packages
+        run: python3 -m pip install -r requirements.txt
+
+      - name: run example code shown in README
+        run: python3 .github/workflows/example_huggingface_v4_28.py
diff --git a/.github/workflows/example_huggingface_newer_than_v4_28.py b/.github/workflows/example_huggingface_newer_than_v4_28.py
new file mode 100644
index 0000000..b3ece64
--- /dev/null
+++ b/.github/workflows/example_huggingface_newer_than_v4_28.py
@@ -0,0 +1,16 @@
+from transformers.models.bert.configuration_bert import BertConfig
+
+config = BertConfig.from_pretrained("zhihan1996/DNABERT-2-117M")
+model = AutoModel.from_pretrained("zhihan1996/DNABERT-2-117M", trust_remote_code=True, config=config)
+
+dna = "ACGTAGCATCGGATCTATCTATCGACACTTGGTTATCGATCTACGAGCATCTCGTTAGC"
+inputs = tokenizer(dna, return_tensors = 'pt')["input_ids"]
+hidden_states = model(inputs)[0] # [1, sequence_length, 768]
+
+# embedding with mean pooling
+embedding_mean = torch.mean(hidden_states[0], dim=0)
+print(embedding_mean.shape) # expect to be 768
+
+# embedding with max pooling
+embedding_max = torch.max(hidden_states[0], dim=0)[0]
+print(embedding_max.shape) # expect to be 768
\ No newline at end of file
diff --git a/.github/workflows/example_huggingface_v4_28.py b/.github/workflows/example_huggingface_v4_28.py
new file mode 100644
index 0000000..e7040f4
--- /dev/null
+++ b/.github/workflows/example_huggingface_v4_28.py
@@ -0,0 +1,17 @@
+import torch
+from transformers import AutoTokenizer, AutoModel
+
+tokenizer = AutoTokenizer.from_pretrained("zhihan1996/DNABERT-2-117M", trust_remote_code=True)
+model = AutoModel.from_pretrained("zhihan1996/DNABERT-2-117M", trust_remote_code=True)
+
+dna = "ACGTAGCATCGGATCTATCTATCGACACTTGGTTATCGATCTACGAGCATCTCGTTAGC"
+inputs = tokenizer(dna, return_tensors = 'pt')["input_ids"]
+hidden_states = model(inputs)[0] # [1, sequence_length, 768]
+
+# embedding with mean pooling
+embedding_mean = torch.mean(hidden_states[0], dim=0)
+print(embedding_mean.shape) # expect to be 768
+
+# embedding with max pooling
+embedding_max = torch.max(hidden_states[0], dim=0)[0]
+print(embedding_max.shape) # expect to be 768
\ No newline at end of file