diff --git a/crawl4ai/__version__.py b/crawl4ai/__version__.py index 9b49905e7..352aa28ab 100644 --- a/crawl4ai/__version__.py +++ b/crawl4ai/__version__.py @@ -1,7 +1,7 @@ # crawl4ai/__version__.py # This is the version that will be used for stable releases -__version__ = "0.8.6" +__version__ = "0.8.7a5" # For nightly builds, this gets set during build process __nightly_version__ = None diff --git a/crawl4ai/cli.py b/crawl4ai/cli.py index 02b67155e..e20d22929 100644 --- a/crawl4ai/cli.py +++ b/crawl4ai/cli.py @@ -35,7 +35,7 @@ from crawl4ai.browser_profiler import ShrinkLevel, _format_size from crawl4ai.config import USER_SETTINGS from crawl4ai.cloud import cloud_cmd -from litellm import completion +from nanollm import completion from pathlib import Path @@ -66,7 +66,7 @@ def setup_llm_config() -> tuple[str, str]: if not provider: click.echo("\nNo default LLM provider configured.") click.echo("Provider format: 'company/model' (e.g., 'openai/gpt-4o', 'anthropic/claude-3-sonnet')") - click.echo("See available providers at: https://docs.litellm.ai/docs/providers") + click.echo("See available providers at: https://github.com/unclecode/nanollm#supported-providers") provider = click.prompt("Enter provider") if not provider.startswith("ollama/"): @@ -344,7 +344,7 @@ def show_examples(): - cohere/command - google/gemini-pro - See full list of providers: https://docs.litellm.ai/docs/providers + See full list of providers: https://github.com/unclecode/nanollm#supported-providers # Set default LLM provider and token in advance crwl config set DEFAULT_LLM_PROVIDER "anthropic/claude-3-sonnet" diff --git a/crawl4ai/legacy/llmtxt.py b/crawl4ai/legacy/llmtxt.py index 302564165..a06975b87 100644 --- a/crawl4ai/legacy/llmtxt.py +++ b/crawl4ai/legacy/llmtxt.py @@ -11,14 +11,14 @@ from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from nltk.stem import WordNetLemmatizer -from litellm import batch_completion +from nanollm import batch_completion from .async_logger import AsyncLogger -import litellm +import nanollm import pickle import hashlib # <--- ADDED for file-hash import glob -litellm.set_verbose = False +nanollm.set_verbose = False def _compute_file_hash(file_path: Path) -> str: diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py index 4b3d96906..962835b5d 100644 --- a/crawl4ai/utils.py +++ b/crawl4ai/utils.py @@ -1774,10 +1774,10 @@ def perform_completion_with_backoff( dict: The API response or an error message after all retries. """ - from litellm import completion - from litellm.exceptions import RateLimitError - import litellm - litellm.drop_params = True # Auto-drop unsupported params (e.g., temperature for O-series/GPT-5) + from nanollm import completion + from nanollm.exceptions import RateLimitError + import nanollm + nanollm.drop_params = True # Auto-drop unsupported params (e.g., temperature for O-series/GPT-5) extra_args = {"temperature": 0.01, "api_key": api_token, "base_url": base_url} if json_response: @@ -1866,11 +1866,11 @@ async def aperform_completion_with_backoff( dict: The API response or an error message after all retries. """ - from litellm import acompletion - from litellm.exceptions import RateLimitError - import litellm + from nanollm import acompletion + from nanollm.exceptions import RateLimitError + import nanollm import asyncio - litellm.drop_params = True # Auto-drop unsupported params (e.g., temperature for O-series/GPT-5) + nanollm.drop_params = True # Auto-drop unsupported params (e.g., temperature for O-series/GPT-5) extra_args = {"temperature": 0.01, "api_key": api_token, "base_url": base_url} if json_response: @@ -1991,7 +1991,7 @@ def extract_blocks_batch(batch_data, provider="groq/llama3-70b-8192", api_token= """ api_token = os.getenv("GROQ_API_KEY", None) if not api_token else api_token - from litellm import batch_completion + from nanollm import batch_completion messages = [] @@ -3566,9 +3566,9 @@ async def get_text_embeddings( if not texts: return np.array([]) - # If LLMConfig provided, use litellm for embeddings + # If LLMConfig provided, use nanollm for embeddings if llm_config is not None: - from litellm import aembedding + from nanollm import aembedding # Get embedding model from config or use default embedding_model = llm_config.get('provider', 'text-embedding-3-small') diff --git a/pyproject.toml b/pyproject.toml index ee237d5b3..6f1d04596 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ dependencies = [ "aiosqlite~=0.20", "anyio>=4.0.0", "lxml~=5.3", - "unclecode-litellm==1.81.13", + "nanollm @ git+https://github.com/hafezparast/nanollm-final.git@8a8d93b02a77f04a32f202620d95c52ad26c7d05", "numpy>=1.26.0,<3", "pillow>=10.4", "playwright>=1.49.0", @@ -46,7 +46,9 @@ dependencies = [ "humanize>=4.10.0", "lark>=1.2.2", "alphashape>=1.3.1", - "shapely>=2.0.0" + "shapely>=2.0.0", + "packaging>=21.0", + "tiktoken>=0.5.0" ] classifiers = [ "Development Status :: 4 - Beta", diff --git a/requirements.txt b/requirements.txt index 9686ffc95..17b1a18a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ aiohttp>=3.11.11 aiosqlite~=0.20 anyio>=4.0.0 lxml~=5.3 -unclecode-litellm==1.81.13 +nanollm @ git+https://github.com/hafezparast/nanollm.git@v0.1.1 numpy>=1.26.0,<3 pillow>=10.4 playwright>=1.49.0