diff --git a/404.html b/404.html
index 0f0ccc9..71b9293 100644
--- a/404.html
+++ b/404.html
@@ -394,6 +394,27 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="/features/docker/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Docker
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/api/getting-started/index.html b/api/getting-started/index.html
index ba9d0fb..6fcfbda 100644
--- a/api/getting-started/index.html
+++ b/api/getting-started/index.html
@@ -13,7 +13,7 @@
         <link rel="canonical" href="https://docs.parsera.org/api/getting-started/">
       
       
-        <link rel="prev" href="../../features/extractors/">
+        <link rel="prev" href="../../features/docker/">
       
       
         <link rel="next" href="../proxy/">
@@ -405,6 +405,27 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../features/docker/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Docker
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/api/proxy/index.html b/api/proxy/index.html
index e669d0f..ec50e63 100644
--- a/api/proxy/index.html
+++ b/api/proxy/index.html
@@ -405,6 +405,27 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../features/docker/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Docker
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/contributing/index.html b/contributing/index.html
index b8d391c..7ec5627 100644
--- a/contributing/index.html
+++ b/contributing/index.html
@@ -403,6 +403,27 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../features/docker/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Docker
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/features/custom-models/index.html b/features/custom-models/index.html
index 6b2989a..7f20a2b 100644
--- a/features/custom-models/index.html
+++ b/features/custom-models/index.html
@@ -461,6 +461,27 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../docker/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Docker
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/features/custom-playwright/index.html b/features/custom-playwright/index.html
index cb2cd04..5b8327f 100644
--- a/features/custom-playwright/index.html
+++ b/features/custom-playwright/index.html
@@ -470,6 +470,27 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../docker/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Docker
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/features/docker/index.html b/features/docker/index.html
new file mode 100644
index 0000000..36c126f
--- /dev/null
+++ b/features/docker/index.html
@@ -0,0 +1,800 @@
+
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    
+      <meta charset="utf-8">
+      <meta name="viewport" content="width=device-width,initial-scale=1">
+      
+        <meta name="description" content="Web-scraping without actually writing a scraper">
+      
+      
+      
+        <link rel="canonical" href="https://docs.parsera.org/features/docker/">
+      
+      
+        <link rel="prev" href="../extractors/">
+      
+      
+        <link rel="next" href="../../api/getting-started/">
+      
+      
+      <link rel="icon" href="../../icon.svg">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.34">
+    
+    
+      
+        <title>Docker - Parsera</title>
+      
+    
+    
+      <link rel="stylesheet" href="../../assets/stylesheets/main.35f28582.min.css">
+      
+        
+        <link rel="stylesheet" href="../../assets/stylesheets/palette.06af60db.min.css">
+      
+      
+
+
+    
+    
+      
+    
+    
+      
+        
+        
+        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
+        <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
+      
+    
+    
+    <script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
+    
+      
+
+    
+    
+    
+  </head>
+  
+  
+    
+    
+      
+    
+    
+    
+    
+    <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="amber" data-md-color-accent="indigo">
+  
+    
+    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
+    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
+    <label class="md-overlay" for="__drawer"></label>
+    <div data-md-component="skip">
+      
+        
+        <a href="#running-in-docker" class="md-skip">
+          Skip to content
+        </a>
+      
+    </div>
+    <div data-md-component="announce">
+      
+    </div>
+    
+    
+      
+
+  
+
+<header class="md-header md-header--shadow" data-md-component="header">
+  <nav class="md-header__inner md-grid" aria-label="Header">
+    <a href="../.." title="Parsera" class="md-header__button md-logo" aria-label="Parsera" data-md-component="logo">
+      
+  <img src="../../icon.svg" alt="logo">
+
+    </a>
+    <label class="md-header__button md-icon" for="__drawer">
+      
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
+    </label>
+    <div class="md-header__title" data-md-component="header-title">
+      <div class="md-header__ellipsis">
+        <div class="md-header__topic">
+          <span class="md-ellipsis">
+            Parsera
+          </span>
+        </div>
+        <div class="md-header__topic" data-md-component="header-topic">
+          <span class="md-ellipsis">
+            
+              Docker
+            
+          </span>
+        </div>
+      </div>
+    </div>
+    
+      
+        <form class="md-header__option" data-md-component="palette">
+  
+    
+    
+    
+    <input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="amber" data-md-color-accent="indigo"  aria-label="Switch to dark mode"  type="radio" name="__palette" id="__palette_0">
+    
+      <label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden>
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m17.75 4.09-2.53 1.94.91 3.06-2.63-1.81-2.63 1.81.91-3.06-2.53-1.94L12.44 4l1.06-3 1.06 3zm3.5 6.91-1.64 1.25.59 1.98-1.7-1.17-1.7 1.17.59-1.98L15.75 11l2.06-.05L18.5 9l.69 1.95zm-2.28 4.95c.83-.08 1.72 1.1 1.19 1.85-.32.45-.66.87-1.08 1.27C15.17 23 8.84 23 4.94 19.07c-3.91-3.9-3.91-10.24 0-14.14.4-.4.82-.76 1.27-1.08.75-.53 1.93.36 1.85 1.19-.27 2.86.69 5.83 2.89 8.02a9.96 9.96 0 0 0 8.02 2.89m-1.64 2.02a12.08 12.08 0 0 1-7.8-3.47c-2.17-2.19-3.33-5-3.49-7.82-2.81 3.14-2.7 7.96.31 10.98 3.02 3.01 7.84 3.12 10.98.31"/></svg>
+      </label>
+    
+  
+    
+    
+    
+    <input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="amber" data-md-color-accent="indigo"  aria-label="Switch to light mode"  type="radio" name="__palette" id="__palette_1">
+    
+      <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden>
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 7a5 5 0 0 1 5 5 5 5 0 0 1-5 5 5 5 0 0 1-5-5 5 5 0 0 1 5-5m0 2a3 3 0 0 0-3 3 3 3 0 0 0 3 3 3 3 0 0 0 3-3 3 3 0 0 0-3-3m0-7 2.39 3.42C13.65 5.15 12.84 5 12 5s-1.65.15-2.39.42zM3.34 7l4.16-.35A7.2 7.2 0 0 0 5.94 8.5c-.44.74-.69 1.5-.83 2.29zm.02 10 1.76-3.77a7.131 7.131 0 0 0 2.38 4.14zM20.65 7l-1.77 3.79a7.02 7.02 0 0 0-2.38-4.15zm-.01 10-4.14.36c.59-.51 1.12-1.14 1.54-1.86.42-.73.69-1.5.83-2.29zM12 22l-2.41-3.44c.74.27 1.55.44 2.41.44.82 0 1.63-.17 2.37-.44z"/></svg>
+      </label>
+    
+  
+</form>
+      
+    
+    
+      <script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
+    
+    
+    
+      <label class="md-header__button md-icon" for="__search">
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
+      </label>
+      <div class="md-search" data-md-component="search" role="dialog">
+  <label class="md-search__overlay" for="__search"></label>
+  <div class="md-search__inner" role="search">
+    <form class="md-search__form" name="search">
+      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
+      <label class="md-search__icon md-icon" for="__search">
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
+      </label>
+      <nav class="md-search__options" aria-label="Search">
+        
+        <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
+          
+          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
+        </button>
+      </nav>
+      
+    </form>
+    <div class="md-search__output">
+      <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
+        <div class="md-search-result" data-md-component="search-result">
+          <div class="md-search-result__meta">
+            Initializing search
+          </div>
+          <ol class="md-search-result__list" role="presentation"></ol>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+    
+    
+      <div class="md-header__source">
+        <a href="https://github.com/raznem/parsera" title="Go to repository" class="md-source" data-md-component="source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
+  </div>
+  <div class="md-source__repository">
+    raznem/parsera
+  </div>
+</a>
+      </div>
+    
+  </nav>
+  
+</header>
+    
+    <div class="md-container" data-md-component="container">
+      
+      
+        
+          
+        
+      
+      <main class="md-main" data-md-component="main">
+        <div class="md-main__inner md-grid">
+          
+            
+              
+              <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+
+
+<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href="../.." title="Parsera" class="md-nav__button md-logo" aria-label="Parsera" data-md-component="logo">
+      
+  <img src="../../icon.svg" alt="logo">
+
+    </a>
+    Parsera
+  </label>
+  
+    <div class="md-nav__source">
+      <a href="https://github.com/raznem/parsera" title="Go to repository" class="md-source" data-md-component="source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
+  </div>
+  <div class="md-source__repository">
+    raznem/parsera
+  </div>
+</a>
+    </div>
+  
+  <ul class="md-nav__list" data-md-scrollfix>
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../.." class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Home
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../getting-started/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Getting started
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+    
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--active md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" checked>
+        
+          
+          <label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Features
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="true">
+          <label class="md-nav__title" for="__nav_3">
+            <span class="md-nav__icon md-icon"></span>
+            Features
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../custom-models/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Custom models
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../proxy/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Proxy
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../custom-playwright/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Custom playwright
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../extractors/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Extractors
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+    
+  
+  
+  
+    <li class="md-nav__item md-nav__item--active">
+      
+      <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
+      
+      
+      
+        <label class="md-nav__link md-nav__link--active" for="__toc">
+          
+  
+  <span class="md-ellipsis">
+    Docker
+  </span>
+  
+
+          <span class="md-nav__icon md-icon"></span>
+        </label>
+      
+      <a href="./" class="md-nav__link md-nav__link--active">
+        
+  
+  <span class="md-ellipsis">
+    Docker
+  </span>
+  
+
+      </a>
+      
+        
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#running-in-docker" class="md-nav__link">
+    <span class="md-ellipsis">
+      Running in Docker
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Running in Docker">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#prerequisites" class="md-nav__link">
+    <span class="md-ellipsis">
+      Prerequisites
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#quickstart" class="md-nav__link">
+    <span class="md-ellipsis">
+      Quickstart
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#docker-make-targets" class="md-nav__link">
+    <span class="md-ellipsis">
+      Docker Make Targets
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+      
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
+        
+          
+          <label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    API
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_4">
+            <span class="md-nav__icon md-icon"></span>
+            API
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../api/getting-started/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Getting started
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../api/proxy/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Proxy
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../contributing/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Contributing
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+  </ul>
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+            
+              
+              <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#running-in-docker" class="md-nav__link">
+    <span class="md-ellipsis">
+      Running in Docker
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Running in Docker">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#prerequisites" class="md-nav__link">
+    <span class="md-ellipsis">
+      Prerequisites
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#quickstart" class="md-nav__link">
+    <span class="md-ellipsis">
+      Quickstart
+    </span>
+  </a>
+  
+</li>
+        
+          <li class="md-nav__item">
+  <a href="#docker-make-targets" class="md-nav__link">
+    <span class="md-ellipsis">
+      Docker Make Targets
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+          
+          
+            <div class="md-content" data-md-component="content">
+              <article class="md-content__inner md-typeset">
+                
+                  
+
+  
+  
+
+
+  <h1>Docker</h1>
+
+<h2 id="running-in-docker">Running in Docker</h2>
+<p>You can get access to the CLI or development environment using Docker.</p>
+<h3 id="prerequisites">Prerequisites</h3>
+<ul>
+<li>Docker: <a href="https://docs.docker.com/get-docker/">Install Docker</a></li>
+<li>Docker Compose: <a href="https://docs.docker.com/compose/install/">Install Docker Compose</a></li>
+</ul>
+<h3 id="quickstart">Quickstart</h3>
+<ol>
+<li>Create a .env file in the project root directory with the following content:</li>
+</ol>
+<div class="language-text highlight"><pre><span></span><code><span id="__span-0-1"><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a>URL=https://parsera.org
+</span><span id="__span-0-2"><a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a>FILE=/app/scheme.json
+</span><span id="__span-0-3"><a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a>OUTPUT=/app/output/result.json
+</span></code></pre></div>
+<ol>
+<li>
+<p>Create <code>scheme.json</code> file with the parsing scheme in the repository root directory.</p>
+</li>
+<li>
+<p>Run <code>make up</code> in this directory.</p>
+</li>
+<li>
+<p>The output will be saved as <code>output/results.json</code> file.</p>
+</li>
+</ol>
+<h3 id="docker-make-targets">Docker Make Targets</h3>
+<div class="language-sh highlight"><pre><span></span><code><span id="__span-1-1"><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a>make<span class="w"> </span>build<span class="w"> </span><span class="c1"># Build Docker image</span>
+</span><span id="__span-1-2"><a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a>
+</span><span id="__span-1-3"><a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a>make<span class="w"> </span>up<span class="w"> </span><span class="c1"># Start containers using Docker Compose</span>
+</span><span id="__span-1-4"><a id="__codelineno-1-4" name="__codelineno-1-4" href="#__codelineno-1-4"></a>
+</span><span id="__span-1-5"><a id="__codelineno-1-5" name="__codelineno-1-5" href="#__codelineno-1-5"></a>make<span class="w"> </span>down<span class="w"> </span><span class="c1"># Stop and remove containers using Docker Compose</span>
+</span><span id="__span-1-6"><a id="__codelineno-1-6" name="__codelineno-1-6" href="#__codelineno-1-6"></a>
+</span><span id="__span-1-7"><a id="__codelineno-1-7" name="__codelineno-1-7" href="#__codelineno-1-7"></a>make<span class="w"> </span>restart<span class="w"> </span><span class="c1"># Restart containers using Docker Compose</span>
+</span><span id="__span-1-8"><a id="__codelineno-1-8" name="__codelineno-1-8" href="#__codelineno-1-8"></a>
+</span><span id="__span-1-9"><a id="__codelineno-1-9" name="__codelineno-1-9" href="#__codelineno-1-9"></a>make<span class="w"> </span>logs<span class="w"> </span><span class="c1"># View logs of the containers</span>
+</span><span id="__span-1-10"><a id="__codelineno-1-10" name="__codelineno-1-10" href="#__codelineno-1-10"></a>
+</span><span id="__span-1-11"><a id="__codelineno-1-11" name="__codelineno-1-11" href="#__codelineno-1-11"></a>make<span class="w"> </span>shell<span class="w"> </span><span class="c1"># Open a shell in the running container</span>
+</span><span id="__span-1-12"><a id="__codelineno-1-12" name="__codelineno-1-12" href="#__codelineno-1-12"></a>
+</span><span id="__span-1-13"><a id="__codelineno-1-13" name="__codelineno-1-13" href="#__codelineno-1-13"></a>make<span class="w"> </span>clean<span class="w"> </span><span class="c1"># Remove all stopped containers, unused networks, and dangling images</span>
+</span></code></pre></div>
+
+
+
+
+
+
+
+
+
+
+
+
+                
+              </article>
+            </div>
+          
+          
+<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
+        </div>
+        
+      </main>
+      
+        <footer class="md-footer">
+  
+  <div class="md-footer-meta md-typeset">
+    <div class="md-footer-meta__inner md-grid">
+      <div class="md-copyright">
+  
+  
+    Made with
+    <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
+      Material for MkDocs
+    </a>
+  
+</div>
+      
+    </div>
+  </div>
+</footer>
+      
+    </div>
+    <div class="md-dialog" data-md-component="dialog">
+      <div class="md-dialog__inner md-typeset"></div>
+    </div>
+    
+    
+    <script id="__config" type="application/json">{"base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.07f07601.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
+    
+    
+      <script src="../../assets/javascripts/bundle.56dfad97.min.js"></script>
+      
+    
+  </body>
+</html>
\ No newline at end of file
diff --git a/features/extractors/index.html b/features/extractors/index.html
index 9280cff..e6008ae 100644
--- a/features/extractors/index.html
+++ b/features/extractors/index.html
@@ -16,7 +16,7 @@
         <link rel="prev" href="../custom-playwright/">
       
       
-        <link rel="next" href="../../api/getting-started/">
+        <link rel="next" href="../docker/">
       
       
       <link rel="icon" href="../../icon.svg">
@@ -479,6 +479,27 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../docker/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Docker
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/features/proxy/index.html b/features/proxy/index.html
index d9a8a49..a097d5b 100644
--- a/features/proxy/index.html
+++ b/features/proxy/index.html
@@ -452,6 +452,27 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../docker/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Docker
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/getting-started/index.html b/getting-started/index.html
index fb669e5..7516ce3 100644
--- a/getting-started/index.html
+++ b/getting-started/index.html
@@ -334,6 +334,30 @@
     </span>
   </a>
   
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#running-as-a-standalone-command-line-tool" class="md-nav__link">
+    <span class="md-ellipsis">
+      Running as a standalone command line tool
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Running as a standalone command line tool">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#usage" class="md-nav__link">
+    <span class="md-ellipsis">
+      Usage
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
 </li>
       
         <li class="md-nav__item">
@@ -472,6 +496,27 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../features/docker/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Docker
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
@@ -625,6 +670,30 @@
     </span>
   </a>
   
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#running-as-a-standalone-command-line-tool" class="md-nav__link">
+    <span class="md-ellipsis">
+      Running as a standalone command line tool
+    </span>
+  </a>
+  
+    <nav class="md-nav" aria-label="Running as a standalone command line tool">
+      <ul class="md-nav__list">
+        
+          <li class="md-nav__item">
+  <a href="#usage" class="md-nav__link">
+    <span class="md-ellipsis">
+      Usage
+    </span>
+  </a>
+  
+</li>
+        
+      </ul>
+    </nav>
+  
 </li>
       
         <li class="md-nav__item">
@@ -694,6 +763,13 @@ <h2 id="basic-usage">Basic usage</h2>
 <p>There is also <code>arun</code> async method available:
 <div class="language-python highlight"><pre><span></span><code><span id="__span-4-1"><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="n">result</span> <span class="o">=</span> <span class="k">await</span> <span class="n">scrapper</span><span class="o">.</span><span class="n">arun</span><span class="p">(</span><span class="n">url</span><span class="o">=</span><span class="n">url</span><span class="p">,</span> <span class="n">elements</span><span class="o">=</span><span class="n">elements</span><span class="p">)</span>
 </span></code></pre></div></p>
+<h2 id="running-as-a-standalone-command-line-tool">Running as a standalone command line tool</h2>
+<p>Before you run <code>Parsera</code> as CLI tool don't forget to put your <code>OPENAI_API_KEY</code> to env variables or <code>.env</code> file</p>
+<h3 id="usage">Usage</h3>
+<p>You can configure elements to parse using <code>JSON string</code> or <code>FILE</code>.
+Optionally, you can provide <code>FILE</code> to write output.</p>
+<div class="language-sh highlight"><pre><span></span><code><span id="__span-5-1"><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a>python<span class="w"> </span>-m<span class="w"> </span>parsera.main<span class="w"> </span>URL<span class="w"> </span><span class="o">{</span>--scheme<span class="w"> </span><span class="s1">&#39;{&quot;title&quot;:&quot;h1&quot;}&#39;</span><span class="w"> </span><span class="p">|</span><span class="w"> </span>--file<span class="w"> </span>FILENAME<span class="o">}</span><span class="w"> </span><span class="o">[</span>--output<span class="w"> </span>FILENAME<span class="o">]</span>
+</span></code></pre></div>
 <h2 id="more-features">More features</h2>
 <p>Check out further documentation to explore more features:</p>
 <ul>
@@ -701,6 +777,7 @@ <h2 id="more-features">More features</h2>
 <li><a href="../features/proxy/">Using proxy</a></li>
 <li><a href="../features/custom-playwright/">Run custom playwright</a></li>
 <li><a href="../features/extractors/">Extractors</a></li>
+<li><a href="../features/docker/">Docker</a></li>
 </ul>
 
 
diff --git a/index.html b/index.html
index f916b54..0e8c87a 100644
--- a/index.html
+++ b/index.html
@@ -461,6 +461,27 @@
 
               
             
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="features/docker/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Docker
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
           </ul>
         </nav>
       
diff --git a/search/search_index.json b/search/search_index.json
index 8fb3563..0cfff19 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Welcome to Parsera","text":"<p>Parsera is a lightweight Python library for scraping websites with LLMs. </p> <p>There are 2 ways of using Parsera:  </p> <ul> <li>Install the library and run it locally, it is great for smaller-scale extraction and experiments.</li> <li>Use an API that provides a more scalable way of data extraction out of the box. Also, it contains some extra features like a built-in proxy. </li> </ul>"},{"location":"#community","title":"Community","text":"<p>If you like this project star it on GitHub and join our discussions on Discord server.</p>"},{"location":"#contributors","title":"Contributors","text":"<p>If you are considering contributing to Parsera, check out the guidelines to get started.</p>"},{"location":"contributing/","title":"Contributing","text":"<p>Thanks for considering contributing to Parsera!  This project is in the early stage of development, so any help will be highly appreciated. You can start from looking through existing issues, or directly asking about the most helpful contributions on Discord.</p>"},{"location":"contributing/#issues","title":"Issues","text":"<p>The best way to ask a question, report a bug, or submit feature request is to submit an Issue. It's much better than asking about it in email or Discord since conversation becomes publicly available and easy to navigate.</p>"},{"location":"contributing/#pull-requests","title":"Pull requests","text":""},{"location":"contributing/#installation-and-setup","title":"Installation and setup","text":"<p>Fork the repository on GitHub and clone your fork locally.  </p> <p>Next, install dependencies using poetry: <pre><code># Clone your fork and cd into the repo directory\ngit clone git@github.com:&lt;your username&gt;/parsera.git\ncd parsera\n\n# If you don't have poetry install it first:\n# https://python-poetry.org/docs/\n# Then:\npoetry install\n# If you are using VS Code you can get python venv path to switch:\npoetry which python\n# To activate virtual environment with installation run:\npoetry shell\n</code></pre> Now you have a virtual environment with Parsera and all necessary dependencies installed.</p>"},{"location":"contributing/#code-style","title":"Code style","text":"<p>The project uses <code>black</code> and <code>isort</code> for formatting. Set up them in your IDE or run this before committing: <pre><code>make format\n</code></pre></p>"},{"location":"contributing/#commit-and-push-changes","title":"Commit and push changes","text":"<p>Commit your changes and push them to your fork, then create a pull request to the Parsera's repository.</p> <p>Thanks a lot for helping improve Parsera!</p>"},{"location":"getting-started/","title":"Welcome to Parsera","text":"<p>Parsera is a lightweight Python library for scraping websites with LLMs. You can run clone and run it locally or use an API, which provides more scalable way and some extra features like built-in proxy.</p>"},{"location":"getting-started/#installation","title":"Installation","text":"<pre><code>pip install parsera\nplaywright install\n</code></pre>"},{"location":"getting-started/#basic-usage","title":"Basic usage","text":"<p>If you want to use OpenAI, remember to set up <code>OPENAI_API_KEY</code> env variable. You can do this from python with: <pre><code>import os\n\nos.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_API_KEY_HERE\"\n</code></pre></p> <p>Next, you can run a basic version that uses <code>gpt-4o-mini</code> <pre><code>from parsera import Parsera\n\nurl = \"https://news.ycombinator.com/\"\nelements = {\n    \"Title\": \"News title\",\n    \"Points\": \"Number of points\",\n    \"Comments\": \"Number of comments\",\n}\n\nscraper = Parsera()\nresult = scraper.run(url=url, elements=elements)\n</code></pre></p> <p><code>result</code> variable will contain a json with a list of records: <pre><code>[\n   {\n      \"Title\":\"Hacking the largest airline and hotel rewards platform (2023)\",\n      \"Points\":\"104\",\n      \"Comments\":\"24\"\n   },\n    ...\n]\n</code></pre></p> <p>There is also <code>arun</code> async method available: <pre><code>result = await scrapper.arun(url=url, elements=elements)\n</code></pre></p>"},{"location":"getting-started/#more-features","title":"More features","text":"<p>Check out further documentation to explore more features:</p> <ul> <li>Running custom models</li> <li>Using proxy</li> <li>Run custom playwright</li> <li>Extractors</li> </ul>"},{"location":"api/getting-started/","title":"Getting started","text":"<p>First, go to Parsera web page and generate an API key.</p> <p>Paste this key to <code>X-API-KEY</code> header to send the request: <pre><code>curl https://api.parsera.org/v1/extract \\\n--header 'Content-Type: application/json' \\\n--header 'X-API-KEY: &lt;YOUR_API_KEY&gt;' \\\n--data '{\n    \"url\": \"https://news.ycombinator.com/\",\n    \"attributes\": [\n        {\n            \"name\": \"Title\",\n            \"description\": \"News title\"\n        },\n        {\n            \"name\": \"Points\",\n            \"description\": \"Number of points\"\n        }\n    ],\n    \"proxy_country\": \"UnitedStates\"\n}'\n</code></pre></p> <p>By default, <code>proxy_country</code> is random, it's recommended to set <code>proxy_country</code> parameter to a specific country in the request since a page could not be available from all locations. Here you can find a full list of proxy countries available.</p> <p>You can also explore Swagger doc of the API following this link: https://api.parsera.org/docs#/.</p>"},{"location":"api/proxy/","title":"Proxy","text":""},{"location":"api/proxy/#setting-proxy-country","title":"Setting proxy country","text":"<p>You can use the <code>proxy_country</code> parameter to set a proxy country. The default is <code>random</code>, and it's recommended to change it since your page could not be available from all locations.</p> <p>To scrape the page from the United States you have to set <code>proxy_country</code> to <code>UnitedStates</code>: <pre><code>curl https://api.parsera.org/v1/extract \\\n--header 'Content-Type: application/json' \\\n--header 'X-API-KEY: &lt;YOUR-API-KEY&gt;' \\\n--data '{\n    \"url\": &lt;TARGET-URL&gt;,\n    \"attributes\": [\n        {\n            \"name\": &lt;First attribute name&gt;,\n            \"description\": &lt;First attribute description&gt;,\n        },\n        {\n            \"name\": &lt;Second attribute name&gt;,\n            \"description\": &lt;Second attribute description&gt;\n        }\n    ],\n    \"proxy_country\": \"UnitedStates\"\n}'\n</code></pre></p>"},{"location":"api/proxy/#list-of-proxy-countries","title":"List of proxy countries","text":"<p>Send a <code>GET</code> request to this URL https://api.parsera.org/v1/proxy-countries, to get the list of countries programmatically.</p> <p>Here is the list of countries available:</p> <ul> <li>Random Country - <code>random</code></li> <li>Afghanistan - <code>Afghanistan</code></li> <li>Albania - <code>Albania</code></li> <li>Algeria - <code>Algeria</code></li> <li>Argentina - <code>Argentina</code></li> <li>Armenia - <code>Armenia</code></li> <li>Aruba - <code>Aruba</code></li> <li>Australia - <code>Australia</code></li> <li>Austria - <code>Austria</code></li> <li>Azerbaijan - <code>Azerbaijan</code></li> <li>Bahamas - <code>Bahamas</code></li> <li>Bahrain - <code>Bahrain</code></li> <li>Bangladesh - <code>Bangladesh</code></li> <li>Belarus - <code>Belarus</code></li> <li>Belgium - <code>Belgium</code></li> <li>Bosnia and Herzegovina - <code>BosniaandHerzegovina</code></li> <li>Brazil - <code>Brazil</code></li> <li>British Virgin Islands - <code>BritishVirginIslands</code></li> <li>Brunei - <code>Brunei</code></li> <li>Bulgaria - <code>Bulgaria</code></li> <li>Cambodia - <code>Cambodia</code></li> <li>Cameroon - <code>Cameroon</code></li> <li>Canada - <code>Canada</code></li> <li>Chile - <code>Chile</code></li> <li>China - <code>China</code></li> <li>Colombia - <code>Colombia</code></li> <li>Costa Rica - <code>CostaRica</code></li> <li>Croatia - <code>Croatia</code></li> <li>Cuba - <code>Cuba</code></li> <li>Cyprus - <code>Cyprus</code></li> <li>Chechia - <code>Chechia</code></li> <li>Denmark - <code>Denmark</code></li> <li>Dominican Republic - <code>DominicanRepublic</code></li> <li>Ecuador - <code>Ecuador</code></li> <li>Egypt - <code>Egypt</code></li> <li>El Salvador - <code>ElSalvador</code></li> <li>Estonia - <code>Estonia</code></li> <li>Ethiopia - <code>Ethiopia</code></li> <li>Finland - <code>Finland</code></li> <li>France - <code>France</code></li> <li>Georgia - <code>Georgia</code></li> <li>Germany - <code>Germany</code></li> <li>Ghana - <code>Ghana</code></li> <li>Greece - <code>Greece</code></li> <li>Guatemala - <code>Guatemala</code></li> <li>Guyana - <code>Guyana</code></li> <li>Hashemite Kingdom of Jordan - <code>HashemiteKingdomofJordan</code></li> <li>Hong Kong - <code>HongKong</code></li> <li>Hungary - <code>Hungary</code></li> <li>India - <code>India</code></li> <li>Indonesia - <code>Indonesia</code></li> <li>Iran - <code>Iran</code></li> <li>Iraq - <code>Iraq</code></li> <li>Ireland - <code>Ireland</code></li> <li>Israel - <code>Israel</code></li> <li>Italy - <code>Italy</code></li> <li>Jamaica - <code>Jamaica</code></li> <li>Japan - <code>Japan</code></li> <li>Kazakhstan - <code>Kazakhstan</code></li> <li>Kenya - <code>Kenya</code></li> <li>Kosovo - <code>Kosovo</code></li> <li>Kuwait - <code>Kuwait</code></li> <li>Latvia - <code>Latvia</code></li> <li>Liechtenstein - <code>Liechtenstein</code></li> <li>Luxembourg - <code>Luxembourg</code></li> <li>Macedonia - <code>Macedonia</code></li> <li>Madagascar - <code>Madagascar</code></li> <li>Malaysia - <code>Malaysia</code></li> <li>Mauritius - <code>Mauritius</code></li> <li>Mexico - <code>Mexico</code></li> <li>Mongolia - <code>Mongolia</code></li> <li>Montenegro - <code>Montenegro</code></li> <li>Morocco - <code>Morocco</code></li> <li>Mozambique - <code>Mozambique</code></li> <li>Myanmar - <code>Myanmar</code></li> <li>Nepal - <code>Nepal</code></li> <li>Netherlands - <code>Netherlands</code></li> <li>New Zealand - <code>NewZealand</code></li> <li>Nigeria - <code>Nigeria</code></li> <li>Norway - <code>Norway</code></li> <li>Oman - <code>Oman</code></li> <li>Pakistan - <code>Pakistan</code></li> <li>Palestine - <code>Palestine</code></li> <li>Panama - <code>Panama</code></li> <li>Papua New Guinea - <code>PapuaNewGuinea</code></li> <li>Paraguay - <code>Paraguay</code></li> <li>Peru - <code>Peru</code></li> <li>Philippines - <code>Philippines</code></li> <li>Poland - <code>Poland</code></li> <li>Portugal - <code>Portugal</code></li> <li>Puerto Rico - <code>PuertoRico</code></li> <li>Qatar - <code>Qatar</code></li> <li>Republic of Lithuania - <code>RepublicOfLithuania</code></li> <li>Republic of Moldova - <code>RepublicOfMoldova</code></li> <li>Romania - <code>Romania</code></li> <li>Russia - <code>Russia</code></li> <li>Saudi Arabia - <code>SaudiArabia</code></li> <li>Senegal - <code>Senegal</code></li> <li>Serbia - <code>Serbia</code></li> <li>Seychelles - <code>Seychelles</code></li> <li>Singapore - <code>Singapore</code></li> <li>Slovakia - <code>Slovakia</code></li> <li>Slovenia - <code>Slovenia</code></li> <li>Somalia - <code>Somalia</code></li> <li>South Africa - <code>SouthAfrica</code></li> <li>South Korea - <code>SouthKorea</code></li> <li>Spain - <code>Spain</code></li> <li>Sri Lanka - <code>SriLanka</code></li> <li>Sudan - <code>Sudan</code></li> <li>Suriname - <code>Suriname</code></li> <li>Sweden - <code>Sweden</code></li> <li>Switzerland - <code>Switzerland</code></li> <li>Syria - <code>Syria</code></li> <li>Taiwan - <code>Taiwan</code></li> <li>Tajikistan - <code>Tajikistan</code></li> <li>Thailand - <code>Thailand</code></li> <li>Trinidad and Tobago - <code>TrinidadandTobago</code></li> <li>Tunisia - <code>Tunisia</code></li> <li>Turkey - <code>Turkey</code></li> <li>Uganda - <code>Uganda</code></li> <li>Ukraine - <code>Ukraine</code></li> <li>United Arab Emirates - <code>UnitedArabEmirates</code></li> <li>United Kingdom - <code>UnitedKingdom</code></li> <li>United States - <code>UnitedStates</code></li> <li>Uzbekistan - <code>Uzbekistan</code></li> <li>Venezuela - <code>Venezuela</code></li> <li>Vietnam - <code>Vietnam</code></li> <li>Zambia - <code>Zambia</code></li> </ul>"},{"location":"features/custom-models/","title":"Custom models","text":""},{"location":"features/custom-models/#run-with-custom-model","title":"Run with custom model","text":"<p>You can instantiate <code>Parsera</code> with any chat model supported by LangChain, for example, to run the model from Azure: <pre><code>import os\nfrom langchain_openai import AzureChatOpenAI\n\nllm = AzureChatOpenAI(\n    azure_endpoint=os.getenv(\"AZURE_GPT_BASE_URL\"),\n    openai_api_version=\"2023-05-15\",\n    deployment_name=os.getenv(\"AZURE_GPT_DEPLOYMENT_NAME\"),\n    openai_api_key=os.getenv(\"AZURE_GPT_API_KEY\"),\n    openai_api_type=\"azure\",\n    temperature=0.0,\n)\n\nurl = \"https://news.ycombinator.com/\"\nelements = {\n    \"Title\": \"News title\",\n    \"Points\": \"Number of points\",\n    \"Comments\": \"Number of comments\",\n}\nscrapper = Parsera(model=llm)\nresult = scrapper.run(url=url, elements=elements)\n</code></pre></p>"},{"location":"features/custom-models/#run-local-model-with-trasformers","title":"Run local model with <code>Trasformers</code>","text":"<p>Currently, we only support models that include a <code>system</code> token</p> <p>You should install <code>Transformers</code> with either <code>pytorch</code> (recommended) or <code>TensorFlow 2.0</code></p> <p>Transformers Installation Guide</p> <p>Example: <pre><code>from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM\nfrom parsera.engine.model import HuggingFaceModel\nfrom parsera import Parsera\n\n# Define the URL and elements to scrape\nurl = \"https://news.ycombinator.com/\"\nelements = {\n\"Title\": \"News title\",\n\"Points\": \"Number of points\",\n\"Comments\": \"Number of comments\",\n}\n\n# Initialize model with transformers pipeline\ntokenizer = AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-128k-instruct\", trust_remote_code=True)\nmodel = AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-128k-instruct\", trust_remote_code=True)\npipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=5000)\n\n# Initialize HuggingFaceModel\nllm = HuggingFaceModel(pipeline=pipe)\n\n# Scrapper with HuggingFace model\nscrapper = Parsera(model=llm)\nresult = scrapper.run(url=url, elements=elements)\n</code></pre></p>"},{"location":"features/custom-playwright/","title":"Custom playwright","text":""},{"location":"features/custom-playwright/#parserascript","title":"ParseraScript","text":"<p>With <code>ParseraScript</code> class you can execute custom playwright scripts during scraping. There are 2 types of code you can run:</p> <ul> <li><code>initial_script</code> which is executed during the first run of <code>ParseraScript</code>, useful when you need to log in to access the data.</li> <li><code>playwright_script</code> which runs during every <code>run</code> call, which allows to do custom actions before data is extracted, useful when data is hidden behind some button.</li> </ul>"},{"location":"features/custom-playwright/#example-log-in-and-load-data","title":"Example: log in and load data","text":"<p>You can log in to parsera.org and get credits amount with the following code: <pre><code>from playwright.async_api import Page\nfrom parsera import ParseraScript\n\n# Define the script to execute during the session creation\nasync def initial_script(page: Page) -&gt; Page:\n    await page.goto(\"https://parsera.org/auth/sign-in\")\n    await page.wait_for_load_state(\"networkidle\")\n    await page.get_by_label(\"Email\").fill(EMAIL)\n    await page.get_by_label(\"Password\").fill(PASSWORD)\n    await page.get_by_role(\"button\", name=\"Sign In\", exact=True).click()\n    await page.wait_for_selector(\"text=Playground\")\n    return page\n\n# This script is executed after the url is opened\nasync def repeating_script(page: Page) -&gt; Page:\n    await page.wait_for_timeout(1000)  # Wait one second for page to load\n    return page\n\nparsera = ParseraScript(model=model, initial_script=initial_script)\nresult = await parsera.arun(\n    url=\"https://parsera.org/app\",\n    elements={\n        \"credits\": \"number of credits\",\n    },\n    playwright_script=repeating_script,\n)\n</code></pre></p>"},{"location":"features/custom-playwright/#access-playwright-instance","title":"Access Playwright instance","text":"<p>The page is fetched via the <code>ParseraScript.loader</code>, which contains the playwright instance. <pre><code>from parsera import ParseraScript\n\nparsera = ParseraScript(model=model)\n\n## You can manually initialize playwright session and modify it:\nawait parsera.new_session()\nawait parsera.loader.load_content(url=url)\n\n## After page is loaded you can access playwright elements, like Page:\nparsera.loader.page.getByRole('button').click()\n\n## Next you cun run extraction process\nresult = await parsera.arun(\n    url=extraction_url,\n    elements=elements_dict,\n)\n</code></pre></p>"},{"location":"features/extractors/","title":"Extractors","text":""},{"location":"features/extractors/#different-extractor-types","title":"Different extractor types","text":"<p>There are different types of extractors, that provide output in different formats:</p> <ul> <li><code>TabularExtractor</code> for tables.</li> <li><code>ListExtractor</code> for separate lists of values.</li> <li><code>ItemExtractor</code> for specific values.</li> </ul> <p>By default a tabular extractor is used.</p>"},{"location":"features/extractors/#tabular-extractor","title":"Tabular extractor","text":"<p><pre><code>from parsera import Parsera\n\nscraper = Parsera(extractor=Parsera.ExtractorType.TABULAR)\n</code></pre> The tabular extractor is used to find rows of tabular data and has output of the form: <pre><code>[\n    {\"name\": \"name1\", \"price\": \"100\"},\n    {\"name\": \"name2\", \"price\": \"150\"},\n    {\"name\": \"name3\", \"price\": \"300\"},\n]\n</code></pre></p>"},{"location":"features/extractors/#list-extractor","title":"List extractor","text":"<p><pre><code>from parsera import Parsera\n\nscraper = Parsera(extractor=Parsera.ExtractorType.LIST)\n</code></pre> The list extractor is used to find lists of different values and has output of the form: <pre><code>{\n    \"name\": [\"name1\", \"name2\", \"name3\"],\n    \"price\": [\"100\", \"150\", \"300\"]\n}\n</code></pre></p>"},{"location":"features/extractors/#item-extractor","title":"Item extractor","text":"<p><pre><code>from parsera import Parsera\n\nscraper = Parsera(extractor=Parsera.ExtractorType.ITEM)\n</code></pre> The item extractor is used to get singular items from a page like a title or price and has output of the form: <pre><code>{\n    \"name\": \"name1\",\n    \"price\": \"100\"\n}\n</code></pre></p>"},{"location":"features/proxy/","title":"Proxy","text":""},{"location":"features/proxy/#using-proxy","title":"Using proxy","text":"<p>You can use serve the traffic via proxy server when calling <code>run</code> method: <pre><code>proxy_settings = {\n    \"server\": \"https://1.2.3.4:5678\",\n    \"username\": &lt;PROXY_USERNAME&gt;,\n    \"password\": &lt;PROXY_PASSWORD&gt;,\n}\nresult = scrapper.run(url=url, elements=elements, proxy_settings=proxy_settings)\n</code></pre></p> <p>Where <code>proxy_settings</code> contains your proxy credentials.</p>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Welcome to Parsera","text":"<p>Parsera is a lightweight Python library for scraping websites with LLMs. </p> <p>There are 2 ways of using Parsera:  </p> <ul> <li>Install the library and run it locally, it is great for smaller-scale extraction and experiments.</li> <li>Use an API that provides a more scalable way of data extraction out of the box. Also, it contains some extra features like a built-in proxy. </li> </ul>"},{"location":"#community","title":"Community","text":"<p>If you like this project star it on GitHub and join our discussions on Discord server.</p>"},{"location":"#contributors","title":"Contributors","text":"<p>If you are considering contributing to Parsera, check out the guidelines to get started.</p>"},{"location":"contributing/","title":"Contributing","text":"<p>Thanks for considering contributing to Parsera!  This project is in the early stage of development, so any help will be highly appreciated. You can start from looking through existing issues, or directly asking about the most helpful contributions on Discord.</p>"},{"location":"contributing/#issues","title":"Issues","text":"<p>The best way to ask a question, report a bug, or submit feature request is to submit an Issue. It's much better than asking about it in email or Discord since conversation becomes publicly available and easy to navigate.</p>"},{"location":"contributing/#pull-requests","title":"Pull requests","text":""},{"location":"contributing/#installation-and-setup","title":"Installation and setup","text":"<p>Fork the repository on GitHub and clone your fork locally.  </p> <p>Next, install dependencies using poetry: <pre><code># Clone your fork and cd into the repo directory\ngit clone git@github.com:&lt;your username&gt;/parsera.git\ncd parsera\n\n# If you don't have poetry install it first:\n# https://python-poetry.org/docs/\n# Then:\npoetry install\n# If you are using VS Code you can get python venv path to switch:\npoetry which python\n# To activate virtual environment with installation run:\npoetry shell\n</code></pre> Now you have a virtual environment with Parsera and all necessary dependencies installed.</p>"},{"location":"contributing/#code-style","title":"Code style","text":"<p>The project uses <code>black</code> and <code>isort</code> for formatting. Set up them in your IDE or run this before committing: <pre><code>make format\n</code></pre></p>"},{"location":"contributing/#commit-and-push-changes","title":"Commit and push changes","text":"<p>Commit your changes and push them to your fork, then create a pull request to the Parsera's repository.</p> <p>Thanks a lot for helping improve Parsera!</p>"},{"location":"getting-started/","title":"Welcome to Parsera","text":"<p>Parsera is a lightweight Python library for scraping websites with LLMs. You can run clone and run it locally or use an API, which provides more scalable way and some extra features like built-in proxy.</p>"},{"location":"getting-started/#installation","title":"Installation","text":"<pre><code>pip install parsera\nplaywright install\n</code></pre>"},{"location":"getting-started/#basic-usage","title":"Basic usage","text":"<p>If you want to use OpenAI, remember to set up <code>OPENAI_API_KEY</code> env variable. You can do this from python with: <pre><code>import os\n\nos.environ[\"OPENAI_API_KEY\"] = \"YOUR_OPENAI_API_KEY_HERE\"\n</code></pre></p> <p>Next, you can run a basic version that uses <code>gpt-4o-mini</code> <pre><code>from parsera import Parsera\n\nurl = \"https://news.ycombinator.com/\"\nelements = {\n    \"Title\": \"News title\",\n    \"Points\": \"Number of points\",\n    \"Comments\": \"Number of comments\",\n}\n\nscraper = Parsera()\nresult = scraper.run(url=url, elements=elements)\n</code></pre></p> <p><code>result</code> variable will contain a json with a list of records: <pre><code>[\n   {\n      \"Title\":\"Hacking the largest airline and hotel rewards platform (2023)\",\n      \"Points\":\"104\",\n      \"Comments\":\"24\"\n   },\n    ...\n]\n</code></pre></p> <p>There is also <code>arun</code> async method available: <pre><code>result = await scrapper.arun(url=url, elements=elements)\n</code></pre></p>"},{"location":"getting-started/#running-as-a-standalone-command-line-tool","title":"Running as a standalone command line tool","text":"<p>Before you run <code>Parsera</code> as CLI tool don't forget to put your <code>OPENAI_API_KEY</code> to env variables or <code>.env</code> file</p>"},{"location":"getting-started/#usage","title":"Usage","text":"<p>You can configure elements to parse using <code>JSON string</code> or <code>FILE</code>. Optionally, you can provide <code>FILE</code> to write output.</p> <pre><code>python -m parsera.main URL {--scheme '{\"title\":\"h1\"}' | --file FILENAME} [--output FILENAME]\n</code></pre>"},{"location":"getting-started/#more-features","title":"More features","text":"<p>Check out further documentation to explore more features:</p> <ul> <li>Running custom models</li> <li>Using proxy</li> <li>Run custom playwright</li> <li>Extractors</li> <li>Docker</li> </ul>"},{"location":"api/getting-started/","title":"Getting started","text":"<p>First, go to Parsera web page and generate an API key.</p> <p>Paste this key to <code>X-API-KEY</code> header to send the request: <pre><code>curl https://api.parsera.org/v1/extract \\\n--header 'Content-Type: application/json' \\\n--header 'X-API-KEY: &lt;YOUR_API_KEY&gt;' \\\n--data '{\n    \"url\": \"https://news.ycombinator.com/\",\n    \"attributes\": [\n        {\n            \"name\": \"Title\",\n            \"description\": \"News title\"\n        },\n        {\n            \"name\": \"Points\",\n            \"description\": \"Number of points\"\n        }\n    ],\n    \"proxy_country\": \"UnitedStates\"\n}'\n</code></pre></p> <p>By default, <code>proxy_country</code> is random, it's recommended to set <code>proxy_country</code> parameter to a specific country in the request since a page could not be available from all locations. Here you can find a full list of proxy countries available.</p> <p>You can also explore Swagger doc of the API following this link: https://api.parsera.org/docs#/.</p>"},{"location":"api/proxy/","title":"Proxy","text":""},{"location":"api/proxy/#setting-proxy-country","title":"Setting proxy country","text":"<p>You can use the <code>proxy_country</code> parameter to set a proxy country. The default is <code>random</code>, and it's recommended to change it since your page could not be available from all locations.</p> <p>To scrape the page from the United States you have to set <code>proxy_country</code> to <code>UnitedStates</code>: <pre><code>curl https://api.parsera.org/v1/extract \\\n--header 'Content-Type: application/json' \\\n--header 'X-API-KEY: &lt;YOUR-API-KEY&gt;' \\\n--data '{\n    \"url\": &lt;TARGET-URL&gt;,\n    \"attributes\": [\n        {\n            \"name\": &lt;First attribute name&gt;,\n            \"description\": &lt;First attribute description&gt;,\n        },\n        {\n            \"name\": &lt;Second attribute name&gt;,\n            \"description\": &lt;Second attribute description&gt;\n        }\n    ],\n    \"proxy_country\": \"UnitedStates\"\n}'\n</code></pre></p>"},{"location":"api/proxy/#list-of-proxy-countries","title":"List of proxy countries","text":"<p>Send a <code>GET</code> request to this URL https://api.parsera.org/v1/proxy-countries, to get the list of countries programmatically.</p> <p>Here is the list of countries available:</p> <ul> <li>Random Country - <code>random</code></li> <li>Afghanistan - <code>Afghanistan</code></li> <li>Albania - <code>Albania</code></li> <li>Algeria - <code>Algeria</code></li> <li>Argentina - <code>Argentina</code></li> <li>Armenia - <code>Armenia</code></li> <li>Aruba - <code>Aruba</code></li> <li>Australia - <code>Australia</code></li> <li>Austria - <code>Austria</code></li> <li>Azerbaijan - <code>Azerbaijan</code></li> <li>Bahamas - <code>Bahamas</code></li> <li>Bahrain - <code>Bahrain</code></li> <li>Bangladesh - <code>Bangladesh</code></li> <li>Belarus - <code>Belarus</code></li> <li>Belgium - <code>Belgium</code></li> <li>Bosnia and Herzegovina - <code>BosniaandHerzegovina</code></li> <li>Brazil - <code>Brazil</code></li> <li>British Virgin Islands - <code>BritishVirginIslands</code></li> <li>Brunei - <code>Brunei</code></li> <li>Bulgaria - <code>Bulgaria</code></li> <li>Cambodia - <code>Cambodia</code></li> <li>Cameroon - <code>Cameroon</code></li> <li>Canada - <code>Canada</code></li> <li>Chile - <code>Chile</code></li> <li>China - <code>China</code></li> <li>Colombia - <code>Colombia</code></li> <li>Costa Rica - <code>CostaRica</code></li> <li>Croatia - <code>Croatia</code></li> <li>Cuba - <code>Cuba</code></li> <li>Cyprus - <code>Cyprus</code></li> <li>Chechia - <code>Chechia</code></li> <li>Denmark - <code>Denmark</code></li> <li>Dominican Republic - <code>DominicanRepublic</code></li> <li>Ecuador - <code>Ecuador</code></li> <li>Egypt - <code>Egypt</code></li> <li>El Salvador - <code>ElSalvador</code></li> <li>Estonia - <code>Estonia</code></li> <li>Ethiopia - <code>Ethiopia</code></li> <li>Finland - <code>Finland</code></li> <li>France - <code>France</code></li> <li>Georgia - <code>Georgia</code></li> <li>Germany - <code>Germany</code></li> <li>Ghana - <code>Ghana</code></li> <li>Greece - <code>Greece</code></li> <li>Guatemala - <code>Guatemala</code></li> <li>Guyana - <code>Guyana</code></li> <li>Hashemite Kingdom of Jordan - <code>HashemiteKingdomofJordan</code></li> <li>Hong Kong - <code>HongKong</code></li> <li>Hungary - <code>Hungary</code></li> <li>India - <code>India</code></li> <li>Indonesia - <code>Indonesia</code></li> <li>Iran - <code>Iran</code></li> <li>Iraq - <code>Iraq</code></li> <li>Ireland - <code>Ireland</code></li> <li>Israel - <code>Israel</code></li> <li>Italy - <code>Italy</code></li> <li>Jamaica - <code>Jamaica</code></li> <li>Japan - <code>Japan</code></li> <li>Kazakhstan - <code>Kazakhstan</code></li> <li>Kenya - <code>Kenya</code></li> <li>Kosovo - <code>Kosovo</code></li> <li>Kuwait - <code>Kuwait</code></li> <li>Latvia - <code>Latvia</code></li> <li>Liechtenstein - <code>Liechtenstein</code></li> <li>Luxembourg - <code>Luxembourg</code></li> <li>Macedonia - <code>Macedonia</code></li> <li>Madagascar - <code>Madagascar</code></li> <li>Malaysia - <code>Malaysia</code></li> <li>Mauritius - <code>Mauritius</code></li> <li>Mexico - <code>Mexico</code></li> <li>Mongolia - <code>Mongolia</code></li> <li>Montenegro - <code>Montenegro</code></li> <li>Morocco - <code>Morocco</code></li> <li>Mozambique - <code>Mozambique</code></li> <li>Myanmar - <code>Myanmar</code></li> <li>Nepal - <code>Nepal</code></li> <li>Netherlands - <code>Netherlands</code></li> <li>New Zealand - <code>NewZealand</code></li> <li>Nigeria - <code>Nigeria</code></li> <li>Norway - <code>Norway</code></li> <li>Oman - <code>Oman</code></li> <li>Pakistan - <code>Pakistan</code></li> <li>Palestine - <code>Palestine</code></li> <li>Panama - <code>Panama</code></li> <li>Papua New Guinea - <code>PapuaNewGuinea</code></li> <li>Paraguay - <code>Paraguay</code></li> <li>Peru - <code>Peru</code></li> <li>Philippines - <code>Philippines</code></li> <li>Poland - <code>Poland</code></li> <li>Portugal - <code>Portugal</code></li> <li>Puerto Rico - <code>PuertoRico</code></li> <li>Qatar - <code>Qatar</code></li> <li>Republic of Lithuania - <code>RepublicOfLithuania</code></li> <li>Republic of Moldova - <code>RepublicOfMoldova</code></li> <li>Romania - <code>Romania</code></li> <li>Russia - <code>Russia</code></li> <li>Saudi Arabia - <code>SaudiArabia</code></li> <li>Senegal - <code>Senegal</code></li> <li>Serbia - <code>Serbia</code></li> <li>Seychelles - <code>Seychelles</code></li> <li>Singapore - <code>Singapore</code></li> <li>Slovakia - <code>Slovakia</code></li> <li>Slovenia - <code>Slovenia</code></li> <li>Somalia - <code>Somalia</code></li> <li>South Africa - <code>SouthAfrica</code></li> <li>South Korea - <code>SouthKorea</code></li> <li>Spain - <code>Spain</code></li> <li>Sri Lanka - <code>SriLanka</code></li> <li>Sudan - <code>Sudan</code></li> <li>Suriname - <code>Suriname</code></li> <li>Sweden - <code>Sweden</code></li> <li>Switzerland - <code>Switzerland</code></li> <li>Syria - <code>Syria</code></li> <li>Taiwan - <code>Taiwan</code></li> <li>Tajikistan - <code>Tajikistan</code></li> <li>Thailand - <code>Thailand</code></li> <li>Trinidad and Tobago - <code>TrinidadandTobago</code></li> <li>Tunisia - <code>Tunisia</code></li> <li>Turkey - <code>Turkey</code></li> <li>Uganda - <code>Uganda</code></li> <li>Ukraine - <code>Ukraine</code></li> <li>United Arab Emirates - <code>UnitedArabEmirates</code></li> <li>United Kingdom - <code>UnitedKingdom</code></li> <li>United States - <code>UnitedStates</code></li> <li>Uzbekistan - <code>Uzbekistan</code></li> <li>Venezuela - <code>Venezuela</code></li> <li>Vietnam - <code>Vietnam</code></li> <li>Zambia - <code>Zambia</code></li> </ul>"},{"location":"features/custom-models/","title":"Custom models","text":""},{"location":"features/custom-models/#run-with-custom-model","title":"Run with custom model","text":"<p>You can instantiate <code>Parsera</code> with any chat model supported by LangChain, for example, to run the model from Azure: <pre><code>import os\nfrom langchain_openai import AzureChatOpenAI\n\nllm = AzureChatOpenAI(\n    azure_endpoint=os.getenv(\"AZURE_GPT_BASE_URL\"),\n    openai_api_version=\"2023-05-15\",\n    deployment_name=os.getenv(\"AZURE_GPT_DEPLOYMENT_NAME\"),\n    openai_api_key=os.getenv(\"AZURE_GPT_API_KEY\"),\n    openai_api_type=\"azure\",\n    temperature=0.0,\n)\n\nurl = \"https://news.ycombinator.com/\"\nelements = {\n    \"Title\": \"News title\",\n    \"Points\": \"Number of points\",\n    \"Comments\": \"Number of comments\",\n}\nscrapper = Parsera(model=llm)\nresult = scrapper.run(url=url, elements=elements)\n</code></pre></p>"},{"location":"features/custom-models/#run-local-model-with-trasformers","title":"Run local model with <code>Trasformers</code>","text":"<p>Currently, we only support models that include a <code>system</code> token</p> <p>You should install <code>Transformers</code> with either <code>pytorch</code> (recommended) or <code>TensorFlow 2.0</code></p> <p>Transformers Installation Guide</p> <p>Example: <pre><code>from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM\nfrom parsera.engine.model import HuggingFaceModel\nfrom parsera import Parsera\n\n# Define the URL and elements to scrape\nurl = \"https://news.ycombinator.com/\"\nelements = {\n\"Title\": \"News title\",\n\"Points\": \"Number of points\",\n\"Comments\": \"Number of comments\",\n}\n\n# Initialize model with transformers pipeline\ntokenizer = AutoTokenizer.from_pretrained(\"microsoft/Phi-3-mini-128k-instruct\", trust_remote_code=True)\nmodel = AutoModelForCausalLM.from_pretrained(\"microsoft/Phi-3-mini-128k-instruct\", trust_remote_code=True)\npipe = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=5000)\n\n# Initialize HuggingFaceModel\nllm = HuggingFaceModel(pipeline=pipe)\n\n# Scrapper with HuggingFace model\nscrapper = Parsera(model=llm)\nresult = scrapper.run(url=url, elements=elements)\n</code></pre></p>"},{"location":"features/custom-playwright/","title":"Custom playwright","text":""},{"location":"features/custom-playwright/#parserascript","title":"ParseraScript","text":"<p>With <code>ParseraScript</code> class you can execute custom playwright scripts during scraping. There are 2 types of code you can run:</p> <ul> <li><code>initial_script</code> which is executed during the first run of <code>ParseraScript</code>, useful when you need to log in to access the data.</li> <li><code>playwright_script</code> which runs during every <code>run</code> call, which allows to do custom actions before data is extracted, useful when data is hidden behind some button.</li> </ul>"},{"location":"features/custom-playwright/#example-log-in-and-load-data","title":"Example: log in and load data","text":"<p>You can log in to parsera.org and get credits amount with the following code: <pre><code>from playwright.async_api import Page\nfrom parsera import ParseraScript\n\n# Define the script to execute during the session creation\nasync def initial_script(page: Page) -&gt; Page:\n    await page.goto(\"https://parsera.org/auth/sign-in\")\n    await page.wait_for_load_state(\"networkidle\")\n    await page.get_by_label(\"Email\").fill(EMAIL)\n    await page.get_by_label(\"Password\").fill(PASSWORD)\n    await page.get_by_role(\"button\", name=\"Sign In\", exact=True).click()\n    await page.wait_for_selector(\"text=Playground\")\n    return page\n\n# This script is executed after the url is opened\nasync def repeating_script(page: Page) -&gt; Page:\n    await page.wait_for_timeout(1000)  # Wait one second for page to load\n    return page\n\nparsera = ParseraScript(model=model, initial_script=initial_script)\nresult = await parsera.arun(\n    url=\"https://parsera.org/app\",\n    elements={\n        \"credits\": \"number of credits\",\n    },\n    playwright_script=repeating_script,\n)\n</code></pre></p>"},{"location":"features/custom-playwright/#access-playwright-instance","title":"Access Playwright instance","text":"<p>The page is fetched via the <code>ParseraScript.loader</code>, which contains the playwright instance. <pre><code>from parsera import ParseraScript\n\nparsera = ParseraScript(model=model)\n\n## You can manually initialize playwright session and modify it:\nawait parsera.new_session()\nawait parsera.loader.load_content(url=url)\n\n## After page is loaded you can access playwright elements, like Page:\nparsera.loader.page.getByRole('button').click()\n\n## Next you cun run extraction process\nresult = await parsera.arun(\n    url=extraction_url,\n    elements=elements_dict,\n)\n</code></pre></p>"},{"location":"features/docker/","title":"Docker","text":""},{"location":"features/docker/#running-in-docker","title":"Running in Docker","text":"<p>You can get access to the CLI or development environment using Docker.</p>"},{"location":"features/docker/#prerequisites","title":"Prerequisites","text":"<ul> <li>Docker: Install Docker</li> <li>Docker Compose: Install Docker Compose</li> </ul>"},{"location":"features/docker/#quickstart","title":"Quickstart","text":"<ol> <li>Create a .env file in the project root directory with the following content:</li> </ol> <pre><code>URL=https://parsera.org\nFILE=/app/scheme.json\nOUTPUT=/app/output/result.json\n</code></pre> <ol> <li> <p>Create <code>scheme.json</code> file with the parsing scheme in the repository root directory.</p> </li> <li> <p>Run <code>make up</code> in this directory.</p> </li> <li> <p>The output will be saved as <code>output/results.json</code> file.</p> </li> </ol>"},{"location":"features/docker/#docker-make-targets","title":"Docker Make Targets","text":"<pre><code>make build # Build Docker image\n\nmake up # Start containers using Docker Compose\n\nmake down # Stop and remove containers using Docker Compose\n\nmake restart # Restart containers using Docker Compose\n\nmake logs # View logs of the containers\n\nmake shell # Open a shell in the running container\n\nmake clean # Remove all stopped containers, unused networks, and dangling images\n</code></pre>"},{"location":"features/extractors/","title":"Extractors","text":""},{"location":"features/extractors/#different-extractor-types","title":"Different extractor types","text":"<p>There are different types of extractors, that provide output in different formats:</p> <ul> <li><code>TabularExtractor</code> for tables.</li> <li><code>ListExtractor</code> for separate lists of values.</li> <li><code>ItemExtractor</code> for specific values.</li> </ul> <p>By default a tabular extractor is used.</p>"},{"location":"features/extractors/#tabular-extractor","title":"Tabular extractor","text":"<p><pre><code>from parsera import Parsera\n\nscraper = Parsera(extractor=Parsera.ExtractorType.TABULAR)\n</code></pre> The tabular extractor is used to find rows of tabular data and has output of the form: <pre><code>[\n    {\"name\": \"name1\", \"price\": \"100\"},\n    {\"name\": \"name2\", \"price\": \"150\"},\n    {\"name\": \"name3\", \"price\": \"300\"},\n]\n</code></pre></p>"},{"location":"features/extractors/#list-extractor","title":"List extractor","text":"<p><pre><code>from parsera import Parsera\n\nscraper = Parsera(extractor=Parsera.ExtractorType.LIST)\n</code></pre> The list extractor is used to find lists of different values and has output of the form: <pre><code>{\n    \"name\": [\"name1\", \"name2\", \"name3\"],\n    \"price\": [\"100\", \"150\", \"300\"]\n}\n</code></pre></p>"},{"location":"features/extractors/#item-extractor","title":"Item extractor","text":"<p><pre><code>from parsera import Parsera\n\nscraper = Parsera(extractor=Parsera.ExtractorType.ITEM)\n</code></pre> The item extractor is used to get singular items from a page like a title or price and has output of the form: <pre><code>{\n    \"name\": \"name1\",\n    \"price\": \"100\"\n}\n</code></pre></p>"},{"location":"features/proxy/","title":"Proxy","text":""},{"location":"features/proxy/#using-proxy","title":"Using proxy","text":"<p>You can use serve the traffic via proxy server when calling <code>run</code> method: <pre><code>proxy_settings = {\n    \"server\": \"https://1.2.3.4:5678\",\n    \"username\": &lt;PROXY_USERNAME&gt;,\n    \"password\": &lt;PROXY_PASSWORD&gt;,\n}\nresult = scrapper.run(url=url, elements=elements, proxy_settings=proxy_settings)\n</code></pre></p> <p>Where <code>proxy_settings</code> contains your proxy credentials.</p>"}]}
\ No newline at end of file
diff --git a/sitemap.xml b/sitemap.xml
index 68531f3..7139d8a 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,38 +2,42 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
     <url>
          <loc>https://docs.parsera.org/</loc>
-         <lastmod>2024-09-09</lastmod>
+         <lastmod>2024-09-13</lastmod>
     </url>
     <url>
          <loc>https://docs.parsera.org/contributing/</loc>
-         <lastmod>2024-09-09</lastmod>
+         <lastmod>2024-09-13</lastmod>
     </url>
     <url>
          <loc>https://docs.parsera.org/getting-started/</loc>
-         <lastmod>2024-09-09</lastmod>
+         <lastmod>2024-09-13</lastmod>
     </url>
     <url>
          <loc>https://docs.parsera.org/api/getting-started/</loc>
-         <lastmod>2024-09-09</lastmod>
+         <lastmod>2024-09-13</lastmod>
     </url>
     <url>
          <loc>https://docs.parsera.org/api/proxy/</loc>
-         <lastmod>2024-09-09</lastmod>
+         <lastmod>2024-09-13</lastmod>
     </url>
     <url>
          <loc>https://docs.parsera.org/features/custom-models/</loc>
-         <lastmod>2024-09-09</lastmod>
+         <lastmod>2024-09-13</lastmod>
     </url>
     <url>
          <loc>https://docs.parsera.org/features/custom-playwright/</loc>
-         <lastmod>2024-09-09</lastmod>
+         <lastmod>2024-09-13</lastmod>
+    </url>
+    <url>
+         <loc>https://docs.parsera.org/features/docker/</loc>
+         <lastmod>2024-09-13</lastmod>
     </url>
     <url>
          <loc>https://docs.parsera.org/features/extractors/</loc>
-         <lastmod>2024-09-09</lastmod>
+         <lastmod>2024-09-13</lastmod>
     </url>
     <url>
          <loc>https://docs.parsera.org/features/proxy/</loc>
-         <lastmod>2024-09-09</lastmod>
+         <lastmod>2024-09-13</lastmod>
     </url>
 </urlset>
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index d576aca..066eb2e 100644
Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ