From f35954293725f1355acdbfeb2dde6b540b5c585b Mon Sep 17 00:00:00 2001
From: vasu gupta <vasu@parspec.io>
Date: Sun, 22 Oct 2023 22:31:34 +0530
Subject: [PATCH] website update oct 22 - 1

---
 index.html      |  6 ++++--
 index.jemdoc    |  3 ++-
 projects.html   | 27 ++++++++++++++++++++++++++-
 projects.jemdoc | 13 +++++++++++++
 4 files changed, 45 insertions(+), 4 deletions(-)
diff --git a/index.html b/index.html
index 8ad1710..4558263 100644
--- a/index.html
+++ b/index.html
@@ -43,7 +43,9 @@ <h2>Education</h2>
 </ul>
 <h2>Work Experience</h2>
 <ul>
-<li><p>Freelancer, Computer Vision and Deep Learning (2020 - Present)</p>
+<li><p>AI Lead, Parspec, India (Remote) (2021 - 2023)</p>
+</li>
+<li><p>Freelancer, Computer Vision and Deep Learning (2020 - 2021)</p>
 </li>
 <li><p>Senior Engineer, Chipset Architecture Design, Qualcomm, Bangalore, India (2016 - 2018) </p>
 </li>
@@ -56,7 +58,7 @@ <h2>Work Experience</h2>
 </ul>
 <div id="footer">
 <div id="footer-text">
-Page generated 2021-08-28 14:33:35 India Standard Time, by <a href="http://jemdoc.jaboc.net/">jemdoc</a>.
+Page generated 2023-10-22 22:28:58 India Standard Time, by <a href="http://jemdoc.jaboc.net/">jemdoc</a>.
 (<a href="index.jemdoc">source</a>)
 </div>
 </div>
diff --git a/index.jemdoc b/index.jemdoc
index 3781957..25f62f3 100644
--- a/index.jemdoc
+++ b/index.jemdoc
@@ -24,7 +24,8 @@ If you would like to connect or hack something cool together, feel free to reach
 - B.Tech. Electrical Engineering, Indian Institute of Technology, Bhubaneswar, India (2008 - 2012) 
 
 == Work Experience
-- Freelancer, Computer Vision and Deep Learning (2020 - Present)
+- AI Lead, Parspec, India (Remote) (2021 - 2023)
+- Freelancer, Computer Vision and Deep Learning (2020 - 2021)
 - Senior Engineer, Chipset Architecture Design, Qualcomm, Bangalore, India (2016 - 2018) 
 - Application Engineer, Computational Electromagnetics and RF Simulations, CST of America, CA, USA (2015 - 2016)
 - Intern, Computational Electromagnetics and RF Simulations, CST of America, CA, USA (Jun 2014 - Aug 2014)
diff --git a/projects.html b/projects.html
index d9f7dda..9f8ad71 100644
--- a/projects.html
+++ b/projects.html
@@ -20,6 +20,31 @@
 <div id="toptitle">
 <h1>Projects</h1>
 </div>
+<h2>Work Projects at Parspec</h2>
+<ul>
+<li><p><b>Parsing and OCRing of tables in pdf documents</b> - developed custom computer vision algorithms for parsing structure of complex bordered tables ( tables with merged rows and columns ) with &nbsp;98% accuracy on our test set , much better than some of the available solutions we tested. OCRing of text was performed using available open-source and commercial solutions</p>
+</li>
+</ul>
+<ul>
+<li><p><b>Detecting header rows and specific columns in tabular data</b> - trained/fine-tuned BERT based models on custom dataset with &gt;95% accuracy on our test set</p>
+</li>
+</ul>
+<ul>
+<li><p><b>Extracting family/model names from lighting product datasheets</b> - developed custom annotations tool for faster, more efficient annotations; trained NER models on our custom dataset with &gt;90% accuracy on test set</p>
+</li>
+</ul>
+<ul>
+<li><p><b>Submittal Aggregator</b> - developed custom algorithms for matching rows in table of contents in submittal pdf documents to corresponding pages in the document.</p>
+</li>
+</ul>
+<ul>
+<li><p><b>Product Recommendation System</b> - developed and implemented a multi-stage pipeline involving word classification, next word prediction and character-token based NER for extracting important technical specs from product datasheets for recommending similar products. Also, evaluated other heuristics and sentence vector-similarity based approaches for product recommendation.</p>
+</li>
+</ul>
+<ul>
+<li><p><b>Distributed scalable pipeline for family name extraction and recommendation system</b> - developed and implemented a custom end-to-end pipeline involving document download, parsing and model inference for running on a distributed set of CPUs and GPUs. Motivation was to reduce number of GPUs required ( reduced from &gt;20 with a naive approach to 1 GPU with dynamic scaling option) and allow full customizability and easy debugging.</p>
+</li>
+</ul>
 <h2>Consulting Projects </h2>
 <ul>
 <li><p><b>Analysed and implemented algorithms for estimation of melanin and hemoglobin distribution in face regions from
@@ -139,7 +164,7 @@ <h2>Signal Processing and Wireless Communications Projects</h2>
 </ul>
 <div id="footer">
 <div id="footer-text">
-Page generated 2021-12-02 13:31:04 India Standard Time, by <a href="http://jemdoc.jaboc.net/">jemdoc</a>.
+Page generated 2023-10-22 22:30:09 India Standard Time, by <a href="http://jemdoc.jaboc.net/">jemdoc</a>.
 (<a href="projects.jemdoc">source</a>)
 </div>
 </div>
diff --git a/projects.jemdoc b/projects.jemdoc
index 43b823e..9910cb7 100644
--- a/projects.jemdoc
+++ b/projects.jemdoc
@@ -1,6 +1,19 @@
 # jemdoc: menu{MENU}{index.html}, showsource
 = Projects
 
+== Work Projects at Parspec
+- *Parsing and OCRing of tables in pdf documents* - developed custom computer vision algorithms for parsing structure of complex bordered tables ( tables with merged rows and columns ) with ~98% accuracy on our test set , much better than some of the available solutions we tested. OCRing of text was performed using available open-source and commercial solutions
+
+- *Detecting header rows and specific columns in tabular data* - trained/fine-tuned BERT based models on custom dataset with >95% accuracy on our test set
+
+- *Extracting family/model names from lighting product datasheets* - developed custom annotations tool for faster, more efficient annotations; trained NER models on our custom dataset with >90% accuracy on test set
+
+- *Submittal Aggregator* - developed custom algorithms for matching rows in table of contents in submittal pdf documents to corresponding pages in the document.
+
+- *Product Recommendation System* - developed and implemented a multi-stage pipeline involving word classification, next word prediction and character-token based NER for extracting important technical specs from product datasheets for recommending similar products. Also, evaluated other heuristics and sentence vector-similarity based approaches for product recommendation.
+
+- *Distributed scalable pipeline for family name extraction and recommendation system* - developed and implemented a custom end-to-end pipeline involving document download, parsing and model inference for running on a distributed set of CPUs and GPUs. Motivation was to reduce number of GPUs required ( reduced from >20 with a naive approach to 1 GPU with dynamic scaling option) and allow full customizability and easy debugging.
+
 == Consulting Projects 
 - *Analysed and implemented algorithms for estimation of melanin and hemoglobin distribution in face regions from
 images captured using mobile selfie camera for a Canada based healthcare startup.* \n