Skip to content

Commit

Permalink
Make sure cache paths are not too long
Browse files Browse the repository at this point in the history
  • Loading branch information
whimo committed May 19, 2024
1 parent 0fc4954 commit 10e9265
Show file tree
Hide file tree
Showing 45 changed files with 43 additions and 39 deletions.
3 changes: 0 additions & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ jobs:
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Support longpaths (for integration tests cache on Windows)
run: git config --system core.longpaths true

- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
Expand Down
3 changes: 0 additions & 3 deletions .github/workflows/integration_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ jobs:
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Support longpaths (for integration tests cache on Windows)
run: git config --system core.longpaths true

- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
Expand Down
3 changes: 0 additions & 3 deletions .github/workflows/integration_test_minimal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ jobs:
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Support longpaths (for integration tests cache on Windows)
run: git config --system core.longpaths true

- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
Expand Down
3 changes: 0 additions & 3 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ jobs:
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Support longpaths (for integration tests cache on Windows)
run: git config --system core.longpaths true

- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
Expand Down
3 changes: 0 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ jobs:
os: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Support longpaths (for integration tests cache on Windows)
run: git config --system core.longpaths true

- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
Expand Down
29 changes: 19 additions & 10 deletions motleycrew/caching/http_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@
from curl_cffi.requests import AsyncSession as CurlCFFI__AsyncSession
from curl_cffi.requests import Headers as CurlCFFI__Headers

from .utils import recursive_hash, hash_code, FakeRLock
from .utils import recursive_hash, shorten_filename, FakeRLock

CACHE_WHITELIST = []
CACHE_BLACKLIST = [
"*//api.lunary.ai/*",
]

CACHE_FILENAME_LENGTH_LIMIT = 120


class CacheException(Exception):
"""Exception for caching process"""
Expand Down Expand Up @@ -128,7 +130,14 @@ def get_cache_file(self, func: Callable, *args, **kwargs) -> Union[tuple, None]:

# check or create cache dirs
root_dir = Path(self.root_cache_dir)
cache_dir = root_dir / url_parsed.hostname / url_parsed.path.strip("/").replace("/", "_")

cache_dir = (
root_dir
/ shorten_filename(url_parsed.hostname, length=CACHE_FILENAME_LENGTH_LIMIT)
/ shorten_filename(
url_parsed.path.strip("/").replace("/", "_"), length=CACHE_FILENAME_LENGTH_LIMIT
)
)
cache_dir.mkdir(parents=True, exist_ok=True)

# Convert args to a dictionary based on the function's signature
Expand All @@ -142,14 +151,14 @@ def get_cache_file(self, func: Callable, *args, **kwargs) -> Union[tuple, None]:
kwargs_clone.pop(param, None)

# Create hash based on argument names, argument values, and function source code
func_source_code_hash = hash_code(inspect.getsource(func))
arg_hash = (
recursive_hash(args_dict, ignore_params=self.ignore_params)
+ recursive_hash(kwargs_clone, ignore_params=self.ignore_params)
+ func_source_code_hash
)

cache_file = cache_dir / "{}.pkl".format(arg_hash)
hashing_base = {
"args": args_dict,
"kwargs": kwargs_clone,
"func_source_code": inspect.getsource(func),
}
call_hash = recursive_hash(hashing_base)

cache_file = cache_dir / "{}.pkl".format(call_hash)
return cache_file, url

def get_response(self, func: Callable, *args, **kwargs) -> Any:
Expand Down
34 changes: 22 additions & 12 deletions motleycrew/caching/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,16 @@ def release(self):
def recursive_hash(value, depth=0, ignore_params=[]):
"""Hash primitives recursively with maximum depth."""
if depth > MAX_DEPTH:
return hashlib.md5("max_depth_reached".encode()).hexdigest()
return hashlib.sha256("max_depth_reached".encode()).hexdigest()

if isinstance(value, (int, float, str, bool, bytes)):
return hashlib.md5(str(value).encode()).hexdigest()
return hashlib.sha256(str(value).encode()).hexdigest()
elif isinstance(value, (list, tuple)):
return hashlib.md5(
"".join(
[recursive_hash(item, depth + 1, ignore_params) for item in value]
).encode()
return hashlib.sha256(
"".join([recursive_hash(item, depth + 1, ignore_params) for item in value]).encode()
).hexdigest()
elif isinstance(value, dict):
return hashlib.md5(
return hashlib.sha256(
"".join(
[
recursive_hash(key, depth + 1, ignore_params)
Expand All @@ -41,8 +39,20 @@ def recursive_hash(value, depth=0, ignore_params=[]):
elif hasattr(value, "__dict__") and value.__class__.__name__ not in ignore_params:
return recursive_hash(value.__dict__, depth + 1, ignore_params)
else:
return hashlib.md5("unknown".encode()).hexdigest()


def hash_code(code):
return hashlib.md5(code.encode()).hexdigest()
return hashlib.sha256("unknown".encode()).hexdigest()


def shorten_filename(filename, length, hash_length=64):
"""
Shorten the filename to a fixed length, keeping it unique by collapsing partly into a hash.
Keeps the start and end of the filename for readability.
"""
assert length > hash_length + 2, "Length should be greater than hash length + 2"
if len(filename) > length:
hash_part = hashlib.sha256(filename.encode()).hexdigest()[:hash_length]
filename = "{}_{}_{}".format(
filename[: length // 2 - hash_length // 2],
hash_part,
filename[-length // 2 + hash_length // 2 :],
)
return filename
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/itest_golden_data/delegation_crewai.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"**Title: Unveiling 2023's Game-Changing AI Innovations: A Leap into the Future**\n\n**Introduction:**\nAs we navigate through 2023, the landscape of artificial intelligence (AI) continues to evolve at an exhilarating pace. From groundbreaking models to innovative applications, AI is not just reshaping industries; it's redefining the very fabric of how we interact with technology. Let's dive into some of the most significant AI advancements this year that are setting the stage for a smarter tomorrow.\n\n**Gemini Ultra: The New Frontier in Language Models:**\nOne of the most remarkable breakthroughs this year has been the introduction of Gemini Ultra, a large language model that has taken the AI community by storm. Achieving a record-breaking score of 90.04% on the MMLU benchmark, Gemini Ultra has demonstrated capabilities that surpass even human experts. This model represents a significant leap forward in our quest to develop AI that can understand and generate human-like text, opening new avenues for AI applications in education, customer service, and beyond.\n\n**Generative AI: Transforming Business Landscapes:**\nThe rise of generative AI tools has been another highlight of the year. A recent McKinsey Global Survey revealed that a third of businesses are now regularly using generative AI to enhance their operations. These tools are not just about automating tasks; they are about creating new ways to engage customers, streamline decision-making, and foster innovation. As AI continues to permeate various sectors, the potential for transformative change is immense, making it an exciting time for tech enthusiasts and industry professionals alike.\n\n**Ethical AI: Navigating the New Challenges:**\nWith great power comes great responsibility. As AI technologies become more integrated into our daily lives, the ethical considerations surrounding their use have become more pressing. From privacy concerns to bias in AI algorithms, the community is actively engaging in discussions and research to ensure that AI development is aligned with ethical standards. This ongoing dialogue is crucial as it guides the responsible deployment of AI technologies, ensuring they benefit society as a whole.\n\n**Conclusion:**\nThe advancements in AI in 2023 are not just technological achievements; they are milestones that are shaping the future of how we interact with machines. As we continue to explore the vast potentials of AI, staying informed and engaged with these developments is essential for anyone keen on technology. The journey of AI is far from over, and what we see today is just the tip of the iceberg. The future is here, and it's powered by AI.\n\nThis blog post aims to capture the essence of the latest AI advancements in a manner that is both informative and captivating for those with a keen interest in technology."
"**Title: The Future is Now: Unpacking 2023's Coolest AI Breakthroughs**\n\n**Introduction**\nWelcome to the cutting edge of technology! 2023 has been a landmark year for artificial intelligence, pushing boundaries and redefining what machines can do. From creating videos out of thin air to understanding and processing the world in multiple data formats, let's dive into the coolest AI advancements that are setting the stage for a futuristic tomorrow.\n\n**Multimodal AI: A Symphony of Data**\nImagine an AI that doesn\u2019t just understand text or images but can interpret them all - voice, video, images, and even code. This year, the rise of multimodal AI has shown us a glimpse of a future where AI can handle complex tasks that require understanding different types of data simultaneously. This isn't just an improvement; it's a revolution in how AI perceives the world, making it more similar to how humans process information.\n\n**Google's Veo: Painting Visions with Words**\nEver thought about describing a scene and letting AI create a video for you? Google's latest marvel, Veo, turns this fantasy into reality. By simply providing text prompts, Veo can generate high-definition video clips. This tool is not just a new toy for creatives but a powerful asset for educators, marketers, and storytellers, offering endless possibilities in video content creation.\n\n**GPT-4: Smarter, Faster, and More Ethical**\nOpenAI continues to impress with its GPT series, and GPT-4 has taken the spotlight this year. With enhanced capabilities to handle a diverse range of data types through a unified interface, GPT-4 is not just smarter but also faster and more ethically aware. Whether it's writing, coding, or even basic reasoning, GPT-4 is on its way to becoming an indispensable tool for professionals across various industries.\n\n**Conclusion**\nAs we look at these incredible advancements, it's clear that AI is not just evolving; it's thriving. These developments promise not only to enhance technological applications but also to improve the quality of our daily lives. The future of AI is here, and it's more exciting than ever. Stay tuned, because this journey into the world of artificial intelligence is just beginning!\n\nThis blog post aims to capture the essence of the latest AI advancements in a manner that is both informative and exciting, making complex technologies relatable and engaging for a tech-savvy audience."
2 changes: 1 addition & 1 deletion tests/itest_golden_data/single_llama_index.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"The advancements in AI in 2024 are marked by significant trends and breakthrough technologies that have the potential to impact various industries profoundly. Here is a comprehensive analysis report:\n\n**1. Key Trends in AI for 2024:**\n - **Explosive Growth of Generative AI and Multimodal AI:** These technologies are becoming more accessible and useful for the general public, not just tech enthusiasts. This trend indicates a shift towards more creative and diverse applications of AI across different media types.\n - **Quantum AI Emergence:** Quantum computing is beginning to integrate with AI, promising substantial leaps in processing power and capabilities.\n - **Explainable AI (XAI):** There's a growing emphasis on making AI systems more transparent and understandable, which is crucial for trust and regulatory compliance.\n - **Evolving Landscape of Edge AI:** This involves processing AI algorithms locally on devices, reducing latency and privacy concerns, which is crucial for applications like autonomous vehicles and IoT devices.\n - **AI Governance:** As AI becomes more pervasive, there is an increasing focus on governance and regulation to ensure ethical use and mitigate risks.\n - **AI and Sustainability:** There is a notable intersection of AI with sustainability efforts, aiming to tackle environmental challenges using AI-driven solutions.\n\n**2. Breakthrough Technologies:**\n - **Generative AI:** Reaching new heights, generative AI is set to revolutionize content creation, design, and interaction by enabling more sophisticated and nuanced outputs.\n - **Multimodal AI:** This technology combines different forms of data (e.g., text, image, sound) to create more comprehensive AI models that better mimic human sensory and cognitive capabilities.\n - **Quantum AI and Edge AI:** These technologies are expected to drastically enhance computational efficiencies and enable real-time AI applications in ways previously not possible.\n\n**3. Potential Industry Impacts:**\n - **Healthcare:** Improved diagnostic tools, personalized medicine, and efficient patient management systems.\n - **Automotive:** Advancements in autonomous driving technologies and better integration of AI in vehicle management systems.\n - **Finance:** Enhanced fraud detection systems, algorithmic trading, and personalized customer services.\n - **Entertainment and Media:** Revolutionized content creation processes and personalized user experiences.\n - **Manufacturing:** Increased automation, improved quality control, and optimized supply chain management.\n\nIn conclusion, the landscape of AI in 2024 is characterized by a blend of technological innovation and increasing integration into everyday life, promising significant transformations across multiple sectors."
"**Comprehensive Analysis Report on AI Advancements in 2024**\n\n**Introduction**\nIn 2024, artificial intelligence (AI) continues to evolve rapidly, impacting various industries and everyday life. This report delves into the latest advancements, identifying key trends, breakthrough technologies, and their potential impacts on industries.\n\n**1. Key Trends in AI**\n - **Generative and Multimodal AI**: There is an explosive growth in generative AI, which is becoming increasingly useful for regular, non-tech individuals. Multimodal AI, which integrates multiple types of data such as text, images, and sound, is also seeing significant advancements.\n - **Quantum AI**: Quantum computing is beginning to integrate with AI technologies, promising substantial leaps in processing power and capabilities.\n - **Explainable AI (XAI)**: Efforts to make AI systems more transparent and understandable, known as \"Black Box\" demystification, are gaining traction. This trend is crucial for increasing trust and manageability in AI applications.\n - **Edge AI**: The evolution of AI processing at the edge, closer to where data is collected, enhances real-time data processing without the latency associated with data transmission to distant servers.\n - **AI Governance**: As AI becomes more pervasive, governance frameworks are being strengthened to ensure ethical, secure, and fair use of AI technologies.\n - **AI and Sustainability**: The intersection of AI with sustainability efforts is emerging as a crucial area, with AI being leveraged to tackle challenges related to climate change and resource management.\n\n**2. Breakthrough Technologies**\n - **Open Source AI Models**: The adoption of open source pretrained AI models is on the rise. These models are empowering businesses to enhance productivity and cost-efficiency by integrating them with private or real-time data. Companies like IBM are actively contributing to and utilizing these open source models.\n\n**3. Industry Impacts**\n - **Business Productivity**: Businesses across various sectors are leveraging AI to improve efficiency and reduce operational costs. The integration of AI in daily business operations is becoming commonplace, driving innovation and competitive advantage.\n - **Data and Analytics Leadership**: There is a noticeable trend of organizations consolidating roles such as chief data and analytics officers, reflecting a strategic shift towards more integrated and centralized AI leadership.\n - **Personalization and Accessibility**: For the average consumer, AI is becoming more accessible and applicable in daily life, from personalized shopping experiences to enhanced home automation.\n\n**Conclusion**\nThe landscape of AI in 2024 is marked by significant advancements and shifts towards more practical, understandable, and integrated applications. As AI technologies continue to evolve, they promise to bring profound changes to how we live and work, making it essential for businesses and individuals to stay informed and adaptable to these changes."

0 comments on commit 10e9265

Please sign in to comment.