Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crawl4ai/__version__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# crawl4ai/__version__.py

# This is the version that will be used for stable releases
__version__ = "0.8.6"
__version__ = "0.8.7a5"

# For nightly builds, this gets set during build process
__nightly_version__ = None
Expand Down
6 changes: 3 additions & 3 deletions crawl4ai/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from crawl4ai.browser_profiler import ShrinkLevel, _format_size
from crawl4ai.config import USER_SETTINGS
from crawl4ai.cloud import cloud_cmd
from litellm import completion
from nanollm import completion
from pathlib import Path


Expand Down Expand Up @@ -66,7 +66,7 @@ def setup_llm_config() -> tuple[str, str]:
if not provider:
click.echo("\nNo default LLM provider configured.")
click.echo("Provider format: 'company/model' (e.g., 'openai/gpt-4o', 'anthropic/claude-3-sonnet')")
click.echo("See available providers at: https://docs.litellm.ai/docs/providers")
click.echo("See available providers at: https://github.com/unclecode/nanollm#supported-providers")
provider = click.prompt("Enter provider")

if not provider.startswith("ollama/"):
Expand Down Expand Up @@ -344,7 +344,7 @@ def show_examples():
- cohere/command
- google/gemini-pro

See full list of providers: https://docs.litellm.ai/docs/providers
See full list of providers: https://github.com/unclecode/nanollm#supported-providers

# Set default LLM provider and token in advance
crwl config set DEFAULT_LLM_PROVIDER "anthropic/claude-3-sonnet"
Expand Down
6 changes: 3 additions & 3 deletions crawl4ai/legacy/llmtxt.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from litellm import batch_completion
from nanollm import batch_completion
from .async_logger import AsyncLogger
import litellm
import nanollm
import pickle
import hashlib # <--- ADDED for file-hash
import glob

litellm.set_verbose = False
nanollm.set_verbose = False


def _compute_file_hash(file_path: Path) -> str:
Expand Down
22 changes: 11 additions & 11 deletions crawl4ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1774,10 +1774,10 @@ def perform_completion_with_backoff(
dict: The API response or an error message after all retries.
"""

from litellm import completion
from litellm.exceptions import RateLimitError
import litellm
litellm.drop_params = True # Auto-drop unsupported params (e.g., temperature for O-series/GPT-5)
from nanollm import completion
from nanollm.exceptions import RateLimitError
import nanollm
nanollm.drop_params = True # Auto-drop unsupported params (e.g., temperature for O-series/GPT-5)

extra_args = {"temperature": 0.01, "api_key": api_token, "base_url": base_url}
if json_response:
Expand Down Expand Up @@ -1866,11 +1866,11 @@ async def aperform_completion_with_backoff(
dict: The API response or an error message after all retries.
"""

from litellm import acompletion
from litellm.exceptions import RateLimitError
import litellm
from nanollm import acompletion
from nanollm.exceptions import RateLimitError
import nanollm
import asyncio
litellm.drop_params = True # Auto-drop unsupported params (e.g., temperature for O-series/GPT-5)
nanollm.drop_params = True # Auto-drop unsupported params (e.g., temperature for O-series/GPT-5)

extra_args = {"temperature": 0.01, "api_key": api_token, "base_url": base_url}
if json_response:
Expand Down Expand Up @@ -1991,7 +1991,7 @@ def extract_blocks_batch(batch_data, provider="groq/llama3-70b-8192", api_token=
"""

api_token = os.getenv("GROQ_API_KEY", None) if not api_token else api_token
from litellm import batch_completion
from nanollm import batch_completion

messages = []

Expand Down Expand Up @@ -3566,9 +3566,9 @@ async def get_text_embeddings(
if not texts:
return np.array([])

# If LLMConfig provided, use litellm for embeddings
# If LLMConfig provided, use nanollm for embeddings
if llm_config is not None:
from litellm import aembedding
from nanollm import aembedding

# Get embedding model from config or use default
embedding_model = llm_config.get('provider', 'text-embedding-3-small')
Expand Down
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ dependencies = [
"aiosqlite~=0.20",
"anyio>=4.0.0",
"lxml~=5.3",
"unclecode-litellm==1.81.13",
"nanollm @ git+https://github.com/hafezparast/nanollm-final.git@8a8d93b02a77f04a32f202620d95c52ad26c7d05",
"numpy>=1.26.0,<3",
"pillow>=10.4",
"playwright>=1.49.0",
Expand Down Expand Up @@ -46,7 +46,9 @@ dependencies = [
"humanize>=4.10.0",
"lark>=1.2.2",
"alphashape>=1.3.1",
"shapely>=2.0.0"
"shapely>=2.0.0",
"packaging>=21.0",
"tiktoken>=0.5.0"
]
classifiers = [
"Development Status :: 4 - Beta",
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ aiohttp>=3.11.11
aiosqlite~=0.20
anyio>=4.0.0
lxml~=5.3
unclecode-litellm==1.81.13
nanollm @ git+https://github.com/hafezparast/nanollm.git@v0.1.1
numpy>=1.26.0,<3
pillow>=10.4
playwright>=1.49.0
Expand Down