Stream Large Files¶
Learn how to download large files efficiently using streaming to save memory.
Streaming Basics¶
Use return_type="stream" with stream_callback to process responses incrementally:
from fastreq import fastreq
def stream_handler(response, url):
"""Handle streaming response."""
content_length = int(response.headers.get('content-length', 0))
downloaded = 0
with open(f"output_{url.split('/')[-1]}", "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded += len(chunk)
print(f"Downloaded: {downloaded}/{content_length} bytes")
results = fastreq(
urls=[
"https://httpbin.org/bytes/10240",
"https://httpbin.org/bytes/20480",
],
return_type="stream",
stream_callback=stream_handler,
)
Downloading with Progress Tracking¶
Track download progress for multiple files:
from fastreq import fastreq
import os
def download_with_progress(response, url):
filename = os.path.basename(url)
total_size = int(response.headers.get('content-length', 0))
downloaded = 0
print(f"Starting: {filename} ({total_size} bytes)")
with open(filename, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded += len(chunk)
if total_size > 0:
percent = (downloaded / total_size) * 100
print(f"{filename}: {percent:.1f}%")
print(f"Completed: {filename}")
results = fastreq(
urls=[
"https://httpbin.org/bytes/10485760", # 10 MB
"https://httpbin.org/bytes/20971520", # 20 MB
],
return_type="stream",
stream_callback=download_with_progress,
)
Processing Streaming Responses Line by Line¶
Process large responses line by line (e.g., CSV, JSON lines):
from fastreq import fastreq
def process_jsonl(response, url):
"""Process JSON Lines format."""
line_count = 0
for line in response.iter_lines():
if line:
import json
data = json.loads(line)
line_count += 1
print(f"Line {line_count}: {data}")
print(f"Processed {line_count} lines from {url}")
results = fastreq(
urls=[
"https://api.example.com/data.jsonl",
],
return_type="stream",
stream_callback=process_jsonl,
)
Memory Efficiency Comparison¶
Non-Streaming (High Memory Usage)¶
from fastreq import fastreq
# Loads entire response into memory
results = fastreq(
urls=["https://example.com/large-file.zip"], # 1 GB file
return_type="content",
)
# Memory: ~1 GB
Streaming (Low Memory Usage)¶
from fastreq import fastreq
def save_stream(response, url):
with open("large-file.zip", "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
# Processes in 8KB chunks
results = fastreq(
urls=["https://example.com/large-file.zip"],
return_type="stream",
stream_callback=save_stream,
)
# Memory: ~8 KB (chunk size)
Parallel File Downloads¶
Download multiple files simultaneously with progress tracking:
from fastreq import fastreq
from tqdm import tqdm # pip install tqdm
import os
files = [
"https://httpbin.org/bytes/10485760", # 10 MB
"https://httpbin.org/bytes/20971520", # 20 MB
"https://httpbin.org/bytes/52428800", # 50 MB
]
def download_with_tqdm(response, url):
filename = f"download_{os.path.basename(url)}"
total_size = int(response.headers.get('content-length', 0))
with tqdm(
total=total_size,
unit='B',
unit_scale=True,
desc=filename,
) as pbar, open(filename, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
pbar.update(len(chunk))
results = fastreq(
urls=files,
concurrency=3,
return_type="stream",
stream_callback=download_with_tqdm,
verbose=False, # Disable default progress bar
)
Filtering Streaming Content¶
Filter content while streaming:
from fastreq import fastreq
def filter_lines(response, url):
"""Filter lines containing 'error' keyword."""
with open(f"filtered_{os.path.basename(url)}", "w") as f:
for line in response.iter_lines():
if line:
text = line.decode('utf-8')
if 'error' in text.lower():
f.write(text + '\n')
results = fastreq(
urls=["https://api.example.com/logs.txt"],
return_type="stream",
stream_callback=filter_lines,
)
Resumable Downloads¶
Implement resumable downloads with Range headers:
from fastreq import fastreq
import os
def resumable_download(response, url):
filename = "large-file.bin"
downloaded_size = 0
# Check if file exists and get size
if os.path.exists(filename):
downloaded_size = os.path.getsize(filename)
print(f"Resuming from {downloaded_size} bytes")
mode = 'ab' if downloaded_size > 0 else 'wb'
with open(filename, mode) as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
print(f"Download complete: {os.path.getsize(filename)} bytes")
# Note: Range headers must be set in request configuration
results = fastreq(
urls=["https://example.com/large-file.bin"],
return_type="stream",
stream_callback=resumable_download,
)
Chunked Upload with Streaming¶
Stream large files during upload:
from fastreq import fastreq
def stream_large_file(filepath):
"""Generator for streaming file chunks."""
with open(filepath, 'rb') as f:
while chunk := f.read(8192):
yield chunk
# Note: This requires backend-specific implementation
# Check documentation for chunked upload support
Error Handling with Streaming¶
Handle errors during streaming:
from fastreq import fastreq
def safe_stream(response, url):
try:
with open(f"output_{os.path.basename(url)}", "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
print(f"Downloaded: {url}")
except Exception as e:
print(f"Error downloading {url}: {e}")
raise
results = fastreq(
urls=["https://httpbin.org/bytes/10240"],
return_type="stream",
stream_callback=safe_stream,
)
Streaming with Different Backends¶
niquests (Recommended)¶
results = fastreq(
urls=["https://example.com/large-file.zip"],
backend="niquests",
return_type="stream",
stream_callback=lambda r, u: save_stream(r, u),
)
aiohttp¶
results = fastreq(
urls=["https://example.com/large-file.zip"],
backend="aiohttp",
return_type="stream",
stream_callback=lambda r, u: save_stream(r, u),
)
requests¶
results = fastreq(
urls=["https://example.com/large-file.zip"],
backend="requests",
return_type="stream",
stream_callback=lambda r, u: save_stream(r, u),
)
Best Practices¶
-
Use Appropriate Chunk Sizes: 8KB - 64KB is typical
-
Check Content-Length: Get file size for progress tracking
-
Handle Empty Chunks: Filter out keep-alive chunks
-
Use Streaming for Large Files: Only use streaming for files > 1MB
-
Close Resources: Always close file handles
See Also¶
- Make Parallel Requests - Basic request configuration
- Limit Request Rate - Control download rate
- API Reference - Configuration options