diff --git a/radish/README.md b/radish/README.md
new file mode 100644
index 0000000..c2f7685
--- /dev/null
+++ b/radish/README.md
@@ -0,0 +1,2 @@
+* input data in /shared/patents/bigrams-nfs/ngrams
+* raco must output JSON+splits file input
diff --git a/radish/catalog.py b/radish/catalog.py
new file mode 100644
index 0000000..75cf8c3
--- /dev/null
+++ b/radish/catalog.py
@@ -0,0 +1,6 @@
+# Schemas corresponding to Myrial examples
+
+{
+    'public:adhoc:ngrams' : [('word', 'STRING_TYPE'), ('filename','STRING_TYPE')],
+    'public:adhoc:tfidf' : [('term', 'STRING_TYPE'), ('document', 'STRING_TYPE'), ('tfidf', 'LONG_TYPE')],
+}
diff --git a/radish/tfidf.myl b/radish/tfidf.myl
new file mode 100644
index 0000000..6ba47c0
--- /dev/null
+++ b/radish/tfidf.myl
@@ -0,0 +1,48 @@
+
+
+
+
+--assume schema for bigrams
+--filename term (bigram is term)
+
+bigrams = scan(ngrams);
+
+-- Frequency(t, d)
+freq = select bigrams.word as term,
+       bigrams.filename as document,
+       count(bigrams.word) as freq
+    from bigrams;  -- groups by term, filename
+
+-- MaxFrequency(d) = max_{w \in d}(Frequency(w,d))
+maxfreq = select freq.document as document,
+                  max(freq.freq) as maxfreq
+    from freq;  -- groups by document
+
+-- term frequency
+tf = select freq.term as term,
+            freq.document as document, 
+            0.5 + 0.5*freq.freq/maxfreq.maxfreq as tf  -- max over words in doc 
+        from freq, maxfreq
+    where freq.document = maxfreq.document;
+
+-- num documents
+-- hardcoded N=300
+
+invfreq = select bigrams.word as term,
+                 COUNT(bigrams.filename) as numdocs
+    from bigrams;  -- groups by term
+
+    -- how many?
+
+idf = select invfreq.term as term,
+             log( FLOAT(300)/ invfreq.numdocs) as idf
+    from invfreq;
+
+
+tfidf = select tf.term as term,
+               tf.document as document,
+               tf.tf * idf.idf as tfidf
+    from tf, idf
+    where tf.term = idf.term;
+
+store(tfidf, tfidf);