diff --git a/examples/files/CV_Candidate.pdf b/examples/files/CV_Candidate.pdf
new file mode 100644
index 0000000..2578c85
Binary files /dev/null and b/examples/files/CV_Candidate.pdf differ
diff --git a/examples/files/Job_Offer.pdf b/examples/files/Job_Offer.pdf
new file mode 100644
index 0000000..b316cc2
Binary files /dev/null and b/examples/files/Job_Offer.pdf differ
diff --git a/examples/resume_processor.py b/examples/resume_processor.py
new file mode 100644
index 0000000..3625203
--- /dev/null
+++ b/examples/resume_processor.py
@@ -0,0 +1,157 @@
+import json
+import os
+from typing import List, Optional
+
+from dotenv import load_dotenv
+from pydantic import Field
+import yaml
+
+from extract_thinker import Extractor, Contract, DocumentLoaderPyPdf
+from litellm import Router
+
+from extract_thinker.llm import LLM
+
+
+def json_to_yaml(json_dict):
+    # Check if json_dict is a dictionary
+    if not isinstance(json_dict, dict):
+        raise ValueError("json_dict must be a dictionary")
+
+    # Convert the Python dictionary to YAML
+    yaml_str = yaml.dump(json_dict)
+
+    return yaml_str
+
+
+class RoleContract(Contract):
+    company_name: str = Field("Company name")
+    years_of_experience: int = Field("Years of experience required. If not mention, calculate with start date and end date")
+    is_remote: bool = Field("Is the role remote?")
+    country: str = Field("Country of the role")
+    city: Optional[str] = Field("City of the role")
+    list_of_skills: List[str] = Field("""
+                                          list of strings, e.g ["5 years experience", "3 years in React", "Typescript"]
+                                          Make the lists of skills to be a yes/no list, so it can be used in the LLM model as a list of true/false
+                                          """)
+
+
+class ResumeContract(Contract):
+    name: str = Field("First and Last Name")
+    age: Optional[str] = Field("Age with format DD/MM/YYYY. Empty if not available")
+    email: str = Field("Email address")
+    phone: Optional[str] = Field("Phone number")
+    address: Optional[str] = Field("Address")
+    city: Optional[str] = Field("City")
+    total_experience: int = Field("Total experience in years")
+    can_go_to_office: Optional[bool] = Field("Can go to office. If city/location is not provider, is false. If is the same city, is true")
+    list_of_skills: List[bool] = Field("Takes the list of skills and returns a list of true/false, if the candidate has that skill. E.g. ['Python', 'JavaScript', 'React', 'Node.js'] -> [True, True, False, True]")
+
+
+class Person(Contract):
+    name: str = Field("First and Last Name")
+    list_of_skills: List[str]
+
+load_dotenv()
+cwd = os.getcwd()
+
+
+def config_router():
+    rpm = 5000  # Rate limit in requests per minute
+
+    model_list = [
+        {
+            "model_name": "Meta-Llama-3-8B-Instruct",
+            "litellm_params": {
+                "model": "deepinfra/meta-llama/Meta-Llama-3-8B-Instruct",
+                "api_key": os.getenv("DEEPINFRA_API_KEY"),
+                "rpm": rpm,
+            },
+        },
+        {
+            "model_name": "Mistral-7B-Instruct-v0.2",
+            "litellm_params": {
+                "model": "deepinfra/mistralai/Mistral-7B-Instruct-v0.2",
+                "api_key": os.getenv("DEEPINFRA_API_KEY"),
+                "rpm": rpm,
+            }
+        },
+        {
+            "model_name": "groq-llama3-8b-8192",
+            "litellm_params": {
+                "model": "groq/llama3-8b-8192",
+                "api_key": os.getenv("GROQ_API_KEY"),
+                "rpm": rpm,
+            }
+        },
+    ]
+
+    # Adding fallback models
+    fallback_models = [
+        {
+            "model_name": "claude-3-haiku-20240307",
+            "litellm_params": {
+                "model": "claude-3-haiku-20240307",
+                "api_key": os.getenv("CLAUDE_API_KEY"),
+            }
+        },
+        {
+            "model_name": "azure-deployment",
+            "litellm_params": {
+                "model": "azure/<your-deployment-name>",
+                "api_base": os.getenv("AZURE_API_BASE"),
+                "api_key": os.getenv("AZURE_API_KEY"),
+                "rpm": 1440,
+            }
+        }
+    ]
+
+    # Combine the lists
+    model_list.extend(fallback_models)
+
+    # Define the router configuration
+    router = Router(
+        model_list=model_list,
+        default_fallbacks=["claude-3-haiku-20240307", "azure/<your-deployment-name>"],
+        context_window_fallbacks=[
+            {"Meta-Llama-3-8B-Instruct": ["claude-3-haiku-20240307"]},
+            {"groq-llama3-8b-8192": ["claude-3-haiku-20240307"]},
+            {"Mistral-7B-Instruct-v0.2": ["claude-3-haiku-20240307"]}
+        ],
+        set_verbose=True
+    )
+
+    return router
+
+
+job_role_path = os.path.join(cwd, "examples", "files", "Job_Offer.pdf")
+
+extractor_job_role = Extractor()
+
+extractor_job_role.load_document_loader(
+    DocumentLoaderPyPdf()
+)
+
+extractor_job_role.load_llm("gpt-4o")
+role_result = extractor_job_role.extract(job_role_path, RoleContract)
+
+print(role_result.json())
+
+extractor_candidate = Extractor()
+extractor_candidate.load_document_loader(
+    DocumentLoaderPyPdf()
+)
+
+llm = LLM("groq/llama3-8b-8192")  # default model
+#llm.load_router(config_router())  # load the router
+
+extractor_candidate.load_llm(llm)
+
+resume_content_path = os.path.join(cwd, "examples", "files", "CV_Candidate.pdf")
+
+job_role_content = "This is the job cotent. to be mapped: \n" + json_to_yaml(json.loads(role_result.json()))
+
+result = extractor_candidate.extract(resume_content_path,
+                                     ResumeContract,
+                                     content=job_role_content)
+
+print(result.json())
diff --git a/extract_thinker/__init__.py b/extract_thinker/__init__.py
index 59db2e8..c14ad4c 100644
--- a/extract_thinker/__init__.py
+++ b/extract_thinker/__init__.py
@@ -3,6 +3,7 @@
 from .document_loader.cached_document_loader import CachedDocumentLoader
 from .document_loader.document_loader_tesseract import DocumentLoaderTesseract
 from .document_loader.document_loader_spreadsheet import DocumentLoaderSpreadSheet
+from .document_loader.document_loader_pypdf import DocumentLoaderPyPdf
 from .document_loader.document_loader_text import DocumentLoaderText
 from .models import classification, classification_response
 from .process import Process
@@ -17,6 +18,7 @@
     'DocumentLoader',
     'CachedDocumentLoader',
     'DocumentLoaderTesseract',
+    'DocumentLoaderPyPdf',
     'DocumentLoaderText',
     'classification',
     'classification_response',
diff --git a/extract_thinker/document_loader/document_loader_llm_image.py b/extract_thinker/document_loader/document_loader_llm_image.py
new file mode 100644
index 0000000..ed75c30
--- /dev/null
+++ b/extract_thinker/document_loader/document_loader_llm_image.py
@@ -0,0 +1,54 @@
+from abc import ABC
+from io import BytesIO
+from PIL import Image
+from extract_thinker.document_loader.cached_document_loader import CachedDocumentLoader
+from extract_thinker.utils import extract_json
+
+
+class DocumentLoaderLLMImage(CachedDocumentLoader, ABC):
+    def __init__(self, content=None, cache_ttl=300, llm=None):
+        super().__init__(content, cache_ttl)
+        self.llm = llm
+
+    def extract_image_content(self, image_stream: BytesIO) -> str:
+        """
+        Extracts text or data from an image using an LLM.
+        The actual implementation uses an LLM to process the image content.
+        """
+        # Load the image from the stream
+        image = Image.open(image_stream)
+
+        # Encode the image to base64
+        base64_image = self.encode_image(image)
+
+        # Use the LLM to extract the content from the image
+        resp = self.llm.completion(
+            model="claude-3-sonnet-20240229",
+            messages=[
+                {
+                    "role": "system",
+                    "content": 'You are a worldclass Image data extractor. You receive an image and extract useful information from it. You output a JSON with the extracted information.',
+                },
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": "data:image/jpeg;base64," + base64_image
+                            },
+                        },
+                        {"type": "text", "text": "###JSON Output\n"},
+                    ],
+                },
+            ],
+        )
+
+        # Extract the JSON text from the response
+        jsonText = resp.choices[0].message.content
+
+        # Extract the JSON from the text
+        jsonText = extract_json(jsonText)
+
+        # Return the extracted content
+        return jsonText
diff --git a/extract_thinker/document_loader/document_loader_pypdf.py b/extract_thinker/document_loader/document_loader_pypdf.py
new file mode 100644
index 0000000..d51255f
--- /dev/null
+++ b/extract_thinker/document_loader/document_loader_pypdf.py
@@ -0,0 +1,41 @@
+import io
+from typing import Any, Dict, List, Union
+from PyPDF2 import PdfReader
+from extract_thinker.document_loader.document_loader_llm_image import DocumentLoaderLLMImage
+
+
+class DocumentLoaderPyPdf(DocumentLoaderLLMImage):
+    def __init__(self, content: Any = None, cache_ttl: int = 300):
+        super().__init__(content, cache_ttl)
+
+    def load_content_from_file(self, file_path: str) -> Union[str, Dict[str, Any]]:
+        reader = PdfReader(file_path)
+        return self.extract_data_from_pdf(reader)
+
+    def load_content_from_stream(self, stream: io.BytesIO) -> Union[str, Dict[str, Any]]:
+        reader = PdfReader(stream)
+        return self.extract_data_from_pdf(reader)
+
+    def load_content_from_file_list(self, file_paths: List[str]) -> List[Any]:
+        return [self.load_content_from_file(file_path) for file_path in file_paths]
+
+    def load_content_from_stream_list(self, streams: List[io.BytesIO]) -> List[Any]:
+        return [self.load_content_from_stream(stream) for stream in streams]
+
+    def extract_data_from_pdf(self, reader: PdfReader) -> Union[str, Dict[str, Any]]:
+        document_data = {
+            "text": []
+        }
+
+        for page in reader.pages:
+            # Extract text and split by newline characters
+            page_text = page.extract_text()
+            document_data["text"].extend(page_text.split('\n'))
+
+        # Skip image extraction for now. TODO
+        # for img_index, image in enumerate(page.images):
+        #     image_data = self.extract_image_content(io.BytesIO(image["data"]))
+        #     if image_data:
+        #         document_data["images"].append(image_data)
+
+        return document_data
diff --git a/extract_thinker/extractor.py b/extract_thinker/extractor.py
index 07ffeae..81dac12 100644
--- a/extract_thinker/extractor.py
+++ b/extract_thinker/extractor.py
@@ -13,7 +13,8 @@
 from extract_thinker.document_loader.loader_interceptor import LoaderInterceptor
 from extract_thinker.document_loader.llm_interceptor import LlmInterceptor
 
-from extract_thinker.utils import get_file_extension
+from extract_thinker.utils import get_file_extension, encode_image
+import yaml
 
 
 SUPPORTED_IMAGE_FORMATS = ["jpeg", "png", "bmp", "tiff"]
@@ -30,6 +31,7 @@ def __init__(
         self.document_loaders_by_file_type: Dict[str, DocumentLoader] = {}
         self.loader_interceptors: List[LoaderInterceptor] = []
         self.llm_interceptors: List[LlmInterceptor] = []
+        self.extra_content: Optional[str] = None
 
     def add_interceptor(
         self, interceptor: Union[LoaderInterceptor, LlmInterceptor]
@@ -55,10 +57,17 @@ def get_document_loader_for_file(self, file: str) -> DocumentLoader:
     def load_document_loader(self, document_loader: DocumentLoader) -> None:
         self.document_loader = document_loader
 
-    def load_llm(self, model: str) -> None:
-        self.llm = LLM(model)
+    def load_llm(self, model: Optional[str] = None) -> None:
+        if isinstance(model, LLM):
+            self.llm = model
+        elif model is not None:
+            self.llm = LLM(model)
+        else:
+            raise ValueError("Either a model string or an LLM object must be provided.")
+
+    def extract(self, source: Union[str, IO, list], response_model: type[BaseModel], vision: bool = False, content: Optional[str] = None) -> Any:
+        self.extra_content = content
 
-    def extract(self, source: Union[str, IO, list], response_model: type[BaseModel], vision: bool = False) -> str:
         if not issubclass(response_model, BaseModel):
             raise ValueError("response_model must be a subclass of Pydantic's BaseModel.")
 
@@ -71,7 +80,7 @@ def extract(self, source: Union[str, IO, list], response_model: type[BaseModel],
         else:
             raise ValueError("Source must be a file path, a stream, or a list of dictionaries")
 
-    async def extract_async(self, source: Union[str, IO, list], response_model: type[BaseModel], vision: bool = False) -> str:
+    async def extract_async(self, source: Union[str, IO, list], response_model: type[BaseModel], vision: bool = False) -> Any:
         return await asyncio.to_thread(self.extract, source, response_model, vision)
 
     def extract_from_list(self, data: List[Dict[Any, Any]], response_model: type[BaseModel], vision: bool) -> str:
@@ -162,9 +171,13 @@ def classify(self, input: Union[str, IO], classifications: List[Classification])
     async def classify_async(self, input: Union[str, IO], classifications: List[Classification]):
         return await asyncio.to_thread(self.classify, input, classifications)
 
-    def _extract(
-        self, content, file_or_stream, response_model, vision=False, is_stream=False
-    ):
+    def _extract(self,
+                 content,
+                 file_or_stream,
+                 response_model,
+                 vision=False,
+                 is_stream=False
+                 ):
         # call all the llm interceptors before calling the llm
         for interceptor in self.llm_interceptors:
             interceptor.intercept(self.llm)
@@ -177,8 +190,18 @@ def _extract(
             },
         ]
 
+        if self.extra_content is not None:
+            if isinstance(self.extra_content, dict):
+                self.extra_content = yaml.dump(self.extra_content)
+            messages.append({"role": "user", "content": "##Extra Content\n\n" + self.extra_content})
+
+        if content is not None:
+            if isinstance(content, dict):
+                content = yaml.dump(content)
+            messages.append({"role": "user", "content": "##Content\n\n" + content})
+
         if vision:
-            base64_encoded_image = self._encode_image_to_base64(
+            base64_encoded_image = encode_image(
                 file_or_stream, is_stream
             )
 
@@ -196,8 +219,6 @@ def _extract(
                     ],
                 }
             ]
-        else:
-            messages.append({"role": "user", "content": "##Content\n\n" + content})
 
         response = self.llm.request(messages, response_model)
         return response
diff --git a/extract_thinker/llm.py b/extract_thinker/llm.py
index f6470df..0278696 100644
--- a/extract_thinker/llm.py
+++ b/extract_thinker/llm.py
@@ -1,21 +1,37 @@
+from typing import List, Dict, Any
 import instructor
 import litellm
 from extract_thinker.utils import num_tokens_from_string
+from litellm import Router
 
 
 class LLM:
-    def __init__(self, model):
-        self.client = instructor.from_litellm(litellm.completion)
+    def __init__(self, model: str):
+        self.client = instructor.from_litellm(litellm.completion, mode=instructor.Mode.MD_JSON)
         self.model = model
+        self.router = None
 
-    def request(self, messages, response_model):
+    def load_router(self, router: Router) -> None:
+        self.router = router
 
+    def request(self, messages: List[Dict[str, str]], response_model: str) -> Any:
         contents = map(lambda message: message['content'], messages)
-
         all_contents = ' '.join(contents)
-        return self.client.chat.completions.create(
-            model=self.model,
-            max_tokens=num_tokens_from_string(all_contents),
-            messages=messages,
-            response_model=response_model,
-        )
+        max_tokens = num_tokens_from_string(all_contents)
+
+        if self.router:
+            response = self.router.completion(
+                model=self.model,
+                max_tokens=max_tokens,
+                messages=messages,
+                response_model=response_model,
+            )
+        else:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                max_tokens=max_tokens,
+                messages=messages,
+                response_model=response_model
+            )
+
+        return response
diff --git a/poetry.lock b/poetry.lock
index 1aeaf63..9a7cc0b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1469,6 +1469,27 @@ files = [
 plugins = ["importlib-metadata"]
 windows-terminal = ["colorama (>=0.4.6)"]
 
+[[package]]
+name = "pypdf2"
+version = "3.0.1"
+description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440"},
+    {file = "pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928"},
+]
+
+[package.dependencies]
+typing_extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
+
+[package.extras]
+crypto = ["PyCryptodome"]
+dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "wheel"]
+docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"]
+full = ["Pillow", "PyCryptodome"]
+image = ["Pillow"]
+
 [[package]]
 name = "pypdfium2"
 version = "4.29.0"
@@ -2176,4 +2197,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "60b4b4fa08db9ddfeae90cc5b04b979871883db42c2d09b274923934f8a3eb9a"
+content-hash = "047a0bc6e650003696373a331d62a070eb264f898d3613222f61649ea9853a12"
diff --git a/pyproject.toml b/pyproject.toml
index 6a1bc83..3ac71e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "extract_thinker"
-version = "0.0.2"
+version = "0.0.3"
 description = "Library to extract data from files and documents agnositicaly using LLMs"
 authors = ["Júlio Almeida <enoch3712@gmail.com>"]
 readme = "README.md"
@@ -20,6 +20,7 @@ cachetools = "^5.3.3"
 pyyaml = "^6.0.1"
 tiktoken = "^0.6.0"
 openpyxl = "^3.1.2"
+pypdf2 = "^3.0.1"
 
 [tool.poetry.dev-dependencies]
 flake8 = "^3.9.2"
diff --git a/tests/classify.py b/tests/classify.py
index 53c8fe9..a5ae394 100644
--- a/tests/classify.py
+++ b/tests/classify.py
@@ -23,7 +23,7 @@ def test_classify_feature():
 
     extractor = Extractor()
     extractor.load_document_loader(DocumentLoaderTesseract(tesseract_path))
-    extractor.load_llm("claude-3-haiku-20240307")
+    extractor.load_llm("gpt-3.5-turbo")
 
     # Act
     result = extractor.classify_from_path(test_file_path, Classifications)
@@ -65,4 +65,4 @@ def test_classify():
     # Assert
     assert result is not None
     assert isinstance(result, ClassificationResponse)
-    assert result.name == "Invoice"
\ No newline at end of file
+    assert result.name == "Invoice"
diff --git a/tests/document_loader_pypdf.py b/tests/document_loader_pypdf.py
new file mode 100644
index 0000000..39562c5
--- /dev/null
+++ b/tests/document_loader_pypdf.py
@@ -0,0 +1,23 @@
+import os
+from dotenv import load_dotenv
+from extract_thinker.document_loader.document_loader_pypdf import DocumentLoaderPyPdf
+
+cwd = os.getcwd()
+load_dotenv()
+
+# Arrange
+loader = DocumentLoaderPyPdf()
+test_file_path = os.path.join(cwd, "files", "CV_Candidate.pdf")
+
+
+def test_load_content_from_file():
+    # Act
+    content = loader.load_content_from_file(test_file_path)
+
+    # Convert the list of words into a single string
+    content_text = " ".join(content["text"])
+
+    # Assert
+    assert content is not None
+    assert "University of New York" in content_text
+    assert "XYZ Innovations" in content_text
diff --git a/tests/files/CV_Candidate.pdf b/tests/files/CV_Candidate.pdf
new file mode 100644
index 0000000..2578c85
Binary files /dev/null and b/tests/files/CV_Candidate.pdf differ