Version 2.3.2 (Complete refactor)
This commit is contained in:
parent
dcb57e5a2b
commit
9963a25e4a
10
.dockerignore
Normal file
10
.dockerignore
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
.gitignore
|
||||||
|
.dockerignore
|
||||||
|
.git
|
||||||
|
.archive
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
logs
|
||||||
|
Dockerfile
|
||||||
|
README.md
|
||||||
|
docker-compose.yaml
|
80
Dockerfile
80
Dockerfile
@ -1,14 +1,72 @@
|
|||||||
FROM archlinux:base
|
FROM python:3.11-slim AS builder
|
||||||
|
|
||||||
# Update and install necessary packages
|
|
||||||
RUN pacman -Sy --noconfirm curl reflector && \
|
|
||||||
reflector --latest 5 --sort rate --save /etc/pacman.d/mirrorlist && \
|
|
||||||
sed -i '/\[options\]/a XferCommand = /usr/bin/curl -C - --fail --retry 3 --retry-delay 3 -o %o %u' /etc/pacman.conf && \
|
|
||||||
pacman -Syu --noconfirm --needed texlive-basic texlive-bibtexextra texlive-bin texlive-binextra texlive-context texlive-fontsrecommended texlive-fontsextra texlive-fontutils texlive-formatsextra texlive-langenglish texlive-langeuropean texlive-langfrench texlive-langgerman texlive-latex texlive-latexextra texlive-latexrecommended texlive-luatex texlive-mathscience texlive-metapost texlive-music texlive-pictures texlive-plaingeneric texlive-pstricks texlive-publishers && \
|
|
||||||
pacman -Syu --noconfirm --needed python-fastapi uvicorn python-python-multipart && \
|
|
||||||
yes | pacman -Scc
|
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY main.py .
|
|
||||||
|
|
||||||
ENTRYPOINT ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
# Install build dependencies
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
build-essential \
|
||||||
|
gcc \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip wheel --no-cache-dir --wheel-dir /app/wheels -r requirements.txt
|
||||||
|
|
||||||
|
|
||||||
|
# Create final image
|
||||||
|
FROM python:3.11-slim AS final
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONPATH=/app \
|
||||||
|
PORT=8000 \
|
||||||
|
MAX_WORKERS=4 \
|
||||||
|
TZ=UTC \
|
||||||
|
JOBS_DIR=/data/jobs \
|
||||||
|
DB_PATH=/data/db/jobs.db
|
||||||
|
|
||||||
|
# Create a non-root user
|
||||||
|
RUN groupadd -r appuser && useradd -r -g appuser appuser
|
||||||
|
|
||||||
|
# Install LaTeX and required dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
texlive-full \
|
||||||
|
tini \
|
||||||
|
sqlite3 \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Set working directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Create persistent directories
|
||||||
|
RUN mkdir -p /data/jobs /data/db /app/temp && \
|
||||||
|
chown -R appuser:appuser /app /data
|
||||||
|
|
||||||
|
# Copy Python wheels from builder stage
|
||||||
|
COPY --from=builder /app/wheels /wheels
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
RUN pip install --no-cache-dir /wheels/* && rm -rf /wheels
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY . /app/
|
||||||
|
|
||||||
|
# Create entrypoint script that properly handles environment variables
|
||||||
|
RUN echo '#!/bin/sh\n\
|
||||||
|
exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers $MAX_WORKERS --log-level info\n\
|
||||||
|
' > /app/entrypoint.sh && chmod +x /app/entrypoint.sh
|
||||||
|
|
||||||
|
# Switch to non-root user
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
# Expose the service port
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Use tini as init
|
||||||
|
ENTRYPOINT ["/usr/bin/tini", "--"]
|
||||||
|
|
||||||
|
# Run the application with proper signal handling
|
||||||
|
CMD ["/app/entrypoint.sh"]
|
||||||
|
219
README.md
219
README.md
@ -1,58 +1,213 @@
|
|||||||
# LaTeX-to-PDF Conversion Service
|
# LaTeX to PDF Conversion Service
|
||||||
|
|
||||||
This service provides an API endpoint to convert LaTeX documents into PDF format. It supports `.zip` file uploads containing the LaTeX source file (`main.tex`) and any associated files (e.g., images or additional `.tex` files).
|
A high-performance, secure REST API for converting LaTeX documents to PDF format.
|
||||||
|
|
||||||
## Getting Started
|
## Features
|
||||||
|
|
||||||
### Building the Docker Image
|
- **Simple API**: Upload a ZIP file containing LaTeX documents and get a PDF back
|
||||||
|
- **Secure Processing**: Comprehensive security measures including input validation and sanitization
|
||||||
|
- **Multiple Workers**: Designed for concurrency with shared file system and SQLite database
|
||||||
|
- **Robust Error Handling**: Detailed error messages with LaTeX compilation logs
|
||||||
|
- **Automatic Cleanup**: Background process removes expired PDFs and temporary files
|
||||||
|
- **Configurable Options**: Multiple compilation runs, BibTeX support, custom main file name
|
||||||
|
- **API Key Authentication**: Optional security layer with configurable API keys
|
||||||
|
- **Rate Limiting**: Protection against API abuse
|
||||||
|
- **Resource Control**: Limits on file sizes and compilation time
|
||||||
|
- **Docker Ready**: Ready-to-use Docker and Docker Compose configurations
|
||||||
|
|
||||||
To build the Docker image for the conversion service, navigate to the project directory and run:
|
## Quick Start
|
||||||
|
|
||||||
|
The easiest way to run the service is with Docker Compose:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build -t rbehzadan/tex2pdf .
|
# Clone the repository
|
||||||
|
git clone https://github.com/yourusername/tex2pdf.git
|
||||||
|
cd tex2pdf
|
||||||
|
|
||||||
|
# Start the service
|
||||||
|
docker-compose up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
### Running the Service
|
The service will be available at `http://localhost:8000`.
|
||||||
|
|
||||||
After building the image, you can start the service with the following command:
|
## API Usage
|
||||||
|
|
||||||
|
### Convert LaTeX to PDF
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run -d -p 8000:8000 rbehzadan/tex2pdf
|
curl -X POST \
|
||||||
|
-H "X-API-Key: 1234" \
|
||||||
|
-F "zip_file=@my_latex_files.zip" \
|
||||||
|
http://localhost:8000/tex2pdf
|
||||||
```
|
```
|
||||||
|
|
||||||
This command runs the Docker container in the background (`-d`) and maps port `8000` of the container to port `8000` on the host, making the service accessible at `http://localhost:8000`.
|
Response:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"job_id": "28f5bf9b-587f-4f3c-a3de-4d737d9736ce",
|
||||||
|
"status": "processing",
|
||||||
|
"message": "Conversion job started"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
## API Endpoint
|
### Check Job Status
|
||||||
|
|
||||||
The service exposes a single POST endpoint at `/tex2pdf` for converting LaTeX to PDF.
|
|
||||||
|
|
||||||
### Uploading a `.zip` File
|
|
||||||
|
|
||||||
The `.zip` file should contain a `main.tex` file and can include additional resources such as images or other `.tex` files used by `main.tex`.
|
|
||||||
|
|
||||||
## Manual Testing
|
|
||||||
|
|
||||||
### Testing with `curl`
|
|
||||||
|
|
||||||
To test the conversion service with `curl`, use the following command:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -X POST -F "zip_file=@path/to/your/file.zip" http://localhost:8000/tex2pdf -o output.pdf
|
curl -X GET \
|
||||||
|
-H "X-API-Key: 1234" \
|
||||||
|
http://localhost:8000/tex2pdf/status/28f5bf9b-587f-4f3c-a3de-4d737d9736ce
|
||||||
```
|
```
|
||||||
|
|
||||||
Replace `path/to/your/file.zip` with the actual path to your `.zip` file. The resulting PDF will be saved as `output.pdf` in the current directory.
|
Response:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"job_id": "28f5bf9b-587f-4f3c-a3de-4d737d9736ce",
|
||||||
|
"status": "completed",
|
||||||
|
"created_at": 1741424390.6039968
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
### Testing with HTTPie
|
### Download PDF
|
||||||
|
|
||||||
HTTPie offers a more user-friendly way to test the service. Use the following command for testing:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
http -f POST http://localhost:8000/tex2pdf zip_file@path/to/your/file.zip > output.pdf
|
curl -X GET \
|
||||||
|
-H "X-API-Key: 1234" \
|
||||||
|
-o output.pdf \
|
||||||
|
http://localhost:8000/tex2pdf/download/28f5bf9b-587f-4f3c-a3de-4d737d9736ce
|
||||||
```
|
```
|
||||||
|
|
||||||
As with `curl`, replace `path/to/your/file.zip` with the path to your `.zip` file. The output will be redirected to `output.pdf` in the current directory.
|
### Health Check
|
||||||
|
|
||||||
## Troubleshooting
|
```bash
|
||||||
|
curl http://localhost:8000/health
|
||||||
|
```
|
||||||
|
|
||||||
If you encounter any issues with the conversion process, ensure that your `.zip` file is structured correctly, with a `main.tex` file at the root. For more detailed error information, consult the service logs.
|
Response:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "healthy",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"database": "connected",
|
||||||
|
"storage": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Advanced Usage
|
||||||
|
|
||||||
|
### Compilation Options
|
||||||
|
|
||||||
|
You can customize the LaTeX compilation process:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST \
|
||||||
|
-H "X-API-Key: 1234" \
|
||||||
|
-F "zip_file=@my_latex_files.zip" \
|
||||||
|
-F "options={\"main_file\": \"document.tex\", \"num_runs\": 3, \"use_bibtex\": true}" \
|
||||||
|
http://localhost:8000/tex2pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
Options:
|
||||||
|
- `main_file`: Main LaTeX file to compile (default: `main.tex`)
|
||||||
|
- `num_runs`: Number of compilation runs (default: 2)
|
||||||
|
- `use_bibtex`: Run BibTeX for bibliography processing (default: false)
|
||||||
|
|
||||||
|
## ZIP File Requirements
|
||||||
|
|
||||||
|
- The ZIP file must contain all necessary files for LaTeX compilation
|
||||||
|
- By default, the service looks for `main.tex` as the main file
|
||||||
|
- All referenced files (images, styles, etc.) should be included
|
||||||
|
- Paths in LaTeX files should be relative and match the ZIP structure
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
The service can be configured via environment variables in the docker-compose.yml file:
|
||||||
|
|
||||||
|
| Variable | Description | Default |
|
||||||
|
|----------|-------------|---------|
|
||||||
|
| `ALLOWED_API_KEYS` | Comma-separated list of valid API keys | "" (empty = no auth) |
|
||||||
|
| `API_KEY_REQUIRED` | Enable/disable API key validation | "true" |
|
||||||
|
| `MAX_WORKERS` | Number of uvicorn workers | 2 |
|
||||||
|
| `MAX_UPLOAD_SIZE` | Maximum file upload size in bytes | 52428800 (50MB) |
|
||||||
|
| `MAX_COMPILATION_TIME` | Maximum LaTeX compilation time in seconds | 240 |
|
||||||
|
| `RATE_LIMIT_WINDOW` | Rate limiting window in seconds | 60 |
|
||||||
|
| `MAX_REQUESTS_PER_WINDOW` | Maximum requests per rate limit window | 10 |
|
||||||
|
| `JOB_EXPIRY` | Job expiry time in seconds | 3600 (1 hour) |
|
||||||
|
| `JOBS_DIR` | Directory for storing PDF files | "/data/jobs" |
|
||||||
|
| `DB_PATH` | Path to SQLite database | "/data/db/jobs.db" |
|
||||||
|
|
||||||
|
## Deployment
|
||||||
|
|
||||||
|
### System Requirements
|
||||||
|
|
||||||
|
- Docker and Docker Compose
|
||||||
|
- For running without Docker:
|
||||||
|
- Python 3.10+
|
||||||
|
- LaTeX distribution (texlive)
|
||||||
|
- SQLite3
|
||||||
|
|
||||||
|
### Production Deployment Considerations
|
||||||
|
|
||||||
|
For production deployments, consider:
|
||||||
|
|
||||||
|
1. **Configure a reverse proxy** (like Nginx) with HTTPS
|
||||||
|
2. **Adjust resource limits** based on your workload
|
||||||
|
3. **Set strong API keys** and restrict access
|
||||||
|
4. **Mount persistent volumes** for job data
|
||||||
|
5. **Monitor disk usage** and adjust `JOB_EXPIRY` accordingly
|
||||||
|
6. **Set up logging** to a centralized logging service
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
The service uses a stateless design with background processing:
|
||||||
|
|
||||||
|
1. **FastAPI Application**: Handles HTTP requests and responses
|
||||||
|
2. **SQLite Database**: Stores job metadata and status
|
||||||
|
3. **File System**: Stores generated PDFs and temporary files
|
||||||
|
4. **Background Tasks**: Process LaTeX compilation asynchronously
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
### Local Development Setup
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Clone the repository
|
||||||
|
git clone https://github.com/yourusername/tex2pdf.git
|
||||||
|
cd tex2pdf
|
||||||
|
|
||||||
|
# Create a virtual environment
|
||||||
|
python -m venv venv
|
||||||
|
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# Run the service
|
||||||
|
uvicorn main:app --reload --host 0.0.0.0 --port 8000
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running Tests
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/
|
||||||
|
```
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
[MIT License](LICENSE)
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
Contributions are welcome! Please feel free to submit a Pull Request.
|
||||||
|
|
||||||
|
## Security Considerations
|
||||||
|
|
||||||
|
While this service implements several security measures:
|
||||||
|
|
||||||
|
- API key authentication
|
||||||
|
- Input validation
|
||||||
|
- Rate limiting
|
||||||
|
- Safe ZIP extraction
|
||||||
|
- Process isolation
|
||||||
|
|
||||||
|
Be aware that allowing users to run LaTeX compilation on your server carries inherent risks. Always deploy behind a secure gateway in production environments.
|
||||||
|
|
||||||
|
21
docker-compose.yaml
Normal file
21
docker-compose.yaml
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
services:
|
||||||
|
app:
|
||||||
|
image: rbehzadan/tex2pdf
|
||||||
|
container_name: tex2pdf
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
environment:
|
||||||
|
- ALLOWED_API_KEYS=1234,5678,abcd # Comma-separated list of allowed API keys
|
||||||
|
- API_KEY_REQUIRED=true # Set to "false" to disable API key validation
|
||||||
|
- MAX_WORKERS=4 # Number of uvicorn workers
|
||||||
|
- MAX_UPLOAD_SIZE=52428800 # 50MB in bytes
|
||||||
|
- MAX_COMPILATION_TIME=240 # Maximum LaTeX compilation time in seconds
|
||||||
|
- RATE_LIMIT_WINDOW=60 # Rate limiting window in seconds
|
||||||
|
- MAX_REQUESTS_PER_WINDOW=10 # Maximum requests per rate limit window
|
||||||
|
- JOB_EXPIRY=3600 # Job expiry time in seconds (1 hour)
|
||||||
|
volumes:
|
||||||
|
- pdf_data:/data
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
pdf_data:
|
738
main.py
738
main.py
@ -1,60 +1,706 @@
|
|||||||
from fastapi import FastAPI, File, UploadFile, HTTPException
|
from fastapi import FastAPI, File, UploadFile, HTTPException, Depends, BackgroundTasks, Request
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse, FileResponse
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import asyncio
|
import asyncio
|
||||||
import tempfile
|
import tempfile
|
||||||
import zipfile
|
import zipfile
|
||||||
import os
|
import os
|
||||||
|
import logging
|
||||||
|
import shutil
|
||||||
|
import re
|
||||||
|
import uuid
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from typing import Optional, Dict, List, Any
|
||||||
|
from pathlib import Path
|
||||||
|
import contextlib
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
import sqlite3
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
app = FastAPI()
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
logger = logging.getLogger("tex2pdf-service")
|
||||||
|
|
||||||
@app.post("/tex2pdf")
|
app = FastAPI(title="LaTeX to PDF Conversion Service")
|
||||||
async def convert_to_pdf(zip_file: UploadFile = File(...)):
|
|
||||||
if zip_file.filename.endswith('.zip'):
|
# Configuration
|
||||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
MAX_UPLOAD_SIZE = int(os.environ.get("MAX_UPLOAD_SIZE", 50 * 1024 * 1024)) # Default: 50 MB
|
||||||
# Unpack the zip file
|
API_KEY_NAME = os.environ.get("API_KEY_NAME", "X-API-Key")
|
||||||
with zipfile.ZipFile(BytesIO(await zip_file.read())) as z:
|
ALLOWED_API_KEYS = os.environ.get("ALLOWED_API_KEYS", "").split(",")
|
||||||
z.extractall(tmpdirname)
|
MAX_COMPILATION_TIME = int(os.environ.get("MAX_COMPILATION_TIME", 240)) # Default: 240 seconds
|
||||||
|
RATE_LIMIT_WINDOW = int(os.environ.get("RATE_LIMIT_WINDOW", 60)) # Default: 60 seconds
|
||||||
|
MAX_REQUESTS_PER_WINDOW = int(os.environ.get("MAX_REQUESTS_PER_WINDOW", 10)) # Default: 10 requests
|
||||||
|
JOB_EXPIRY = int(os.environ.get("JOB_EXPIRY", 3600)) # Default: 1 hour
|
||||||
|
JOBS_DIR = os.environ.get("JOBS_DIR", "/app/jobs")
|
||||||
|
DB_PATH = os.environ.get("DB_PATH", "/app/db/jobs.db")
|
||||||
|
API_KEY_REQUIRED = len(ALLOWED_API_KEYS) > 0
|
||||||
|
if API_KEY_REQUIRED:
|
||||||
|
API_KEY_REQUIRED = os.environ.get("API_KEY_REQUIRED", "true").lower() in ("true", "1", "yes")
|
||||||
|
VERSION=open("VERSION").read().strip()
|
||||||
|
|
||||||
|
# Create necessary directories
|
||||||
|
os.makedirs(JOBS_DIR, exist_ok=True)
|
||||||
|
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
||||||
|
|
||||||
|
# Initialize SQLite database
|
||||||
|
def init_db():
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
conn.execute('''
|
||||||
|
CREATE TABLE IF NOT EXISTS jobs (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
status TEXT NOT NULL,
|
||||||
|
created_at REAL NOT NULL,
|
||||||
|
work_dir TEXT,
|
||||||
|
api_key TEXT,
|
||||||
|
options TEXT,
|
||||||
|
error TEXT,
|
||||||
|
progress TEXT,
|
||||||
|
updated_at REAL NOT NULL
|
||||||
|
)
|
||||||
|
''')
|
||||||
|
# Add index for faster lookups
|
||||||
|
conn.execute('CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)')
|
||||||
|
conn.execute('CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs(created_at)')
|
||||||
|
|
||||||
|
# Thread pool for database operations
|
||||||
|
executor = ThreadPoolExecutor(max_workers=4)
|
||||||
|
|
||||||
|
# In-memory rate limiting
|
||||||
|
rate_limits: Dict[str, List[float]] = {}
|
||||||
|
|
||||||
|
class ConversionOptions(BaseModel):
|
||||||
|
main_file: str = Field(default="main.tex", description="Main LaTeX file to compile")
|
||||||
|
num_runs: int = Field(default=2, ge=1, le=5, description="Number of compilation runs")
|
||||||
|
use_bibtex: bool = Field(default=False, description="Run BibTeX for bibliography")
|
||||||
|
|
||||||
|
def verify_api_key(request: Request):
|
||||||
|
# If API keys are not required, skip validation
|
||||||
|
if not API_KEY_REQUIRED:
|
||||||
|
return "no_auth"
|
||||||
|
|
||||||
# Change working directory to tmpdirname
|
api_key = request.headers.get(API_KEY_NAME)
|
||||||
os.chdir(tmpdirname)
|
|
||||||
|
# Check if API key is provided and valid
|
||||||
|
if not api_key:
|
||||||
|
logger.warning("Missing API key in request")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=401,
|
||||||
|
detail="API key required",
|
||||||
|
)
|
||||||
|
|
||||||
|
if not ALLOWED_API_KEYS or api_key not in ALLOWED_API_KEYS:
|
||||||
|
logger.warning(f"Unauthorized access attempt with API key: {api_key[:5]}...")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=401,
|
||||||
|
detail="Invalid API key",
|
||||||
|
)
|
||||||
|
|
||||||
|
return api_key
|
||||||
|
|
||||||
|
def check_rate_limit(request: Request, api_key: str = Depends(verify_api_key)):
|
||||||
|
client_id = api_key or request.client.host
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
if client_id not in rate_limits:
|
||||||
|
rate_limits[client_id] = []
|
||||||
|
|
||||||
|
# Remove timestamps outside the window
|
||||||
|
rate_limits[client_id] = [t for t in rate_limits[client_id] if current_time - t < RATE_LIMIT_WINDOW]
|
||||||
|
|
||||||
|
if len(rate_limits[client_id]) >= MAX_REQUESTS_PER_WINDOW:
|
||||||
|
logger.warning(f"Rate limit exceeded for {client_id[:5]}...")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=429,
|
||||||
|
detail=f"Rate limit exceeded. Maximum {MAX_REQUESTS_PER_WINDOW} requests per {RATE_LIMIT_WINDOW} seconds.",
|
||||||
|
)
|
||||||
|
|
||||||
|
rate_limits[client_id].append(current_time)
|
||||||
|
return client_id
|
||||||
|
|
||||||
|
def validate_latex_filename(filename: str) -> bool:
|
||||||
|
"""Validate if the filename follows safe LaTeX filename conventions."""
|
||||||
|
return bool(re.match(r'^[a-zA-Z0-9_\-\.]+\.tex$', filename))
|
||||||
|
|
||||||
|
def sanitize_zip_archive(zip_file_obj, extract_path):
|
||||||
|
"""Extracts zip contents safely, preventing directory traversal attacks."""
|
||||||
|
try:
|
||||||
|
with zipfile.ZipFile(zip_file_obj) as zip_ref:
|
||||||
|
# Log zip contents for debugging
|
||||||
|
logger.info(f"ZIP contents: {zip_ref.namelist()}")
|
||||||
|
|
||||||
# Find the main LaTeX file (assuming a convention, e.g., main.tex)
|
# First, check for suspicious paths
|
||||||
main_tex_file = 'main.tex'
|
for file_info in zip_ref.infolist():
|
||||||
main_tex_path = os.path.join(tmpdirname, main_tex_file)
|
# Convert to Path for safer path handling
|
||||||
if not os.path.exists(main_tex_path):
|
file_path = Path(file_info.filename)
|
||||||
raise HTTPException(status_code=400, detail="Main LaTeX file (main.tex) not found in the zip.")
|
|
||||||
|
# Check for absolute paths or directory traversal attempts
|
||||||
|
if file_path.is_absolute() or '..' in file_path.parts:
|
||||||
|
raise ValueError(f"Suspicious path detected: {file_info.filename}")
|
||||||
|
|
||||||
|
# Check for extremely large files
|
||||||
|
if file_info.file_size > MAX_UPLOAD_SIZE:
|
||||||
|
raise ValueError(f"File too large: {file_info.filename}")
|
||||||
|
|
||||||
|
# If all files pass validation, extract them
|
||||||
|
for file_info in zip_ref.infolist():
|
||||||
|
# Skip directories
|
||||||
|
if file_info.filename.endswith('/'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Create a safe extraction path
|
||||||
|
target_path = Path(extract_path) / file_info.filename
|
||||||
|
|
||||||
|
# Create parent directories if they don't exist
|
||||||
|
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Extract the file
|
||||||
|
with zip_ref.open(file_info) as source, open(target_path, 'wb') as target:
|
||||||
|
shutil.copyfileobj(source, target)
|
||||||
|
|
||||||
|
# List extracted files for debugging
|
||||||
|
extracted_files = list(Path(extract_path).glob('**/*'))
|
||||||
|
logger.info(f"Extracted files: {[str(f.relative_to(extract_path)) for f in extracted_files]}")
|
||||||
|
|
||||||
|
return True
|
||||||
|
except zipfile.BadZipFile:
|
||||||
|
raise ValueError("Invalid ZIP file format")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during ZIP extraction: {str(e)}", exc_info=True)
|
||||||
|
raise ValueError(f"Error extracting ZIP: {str(e)}")
|
||||||
|
|
||||||
# Compile the LaTeX document
|
@contextlib.contextmanager
|
||||||
cmd = ['pdflatex', '-interaction=nonstopmode', '-output-directory', tmpdirname, main_tex_path]
|
def working_directory(path):
|
||||||
process = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
|
"""Changes working directory within the context and reverts back afterwards."""
|
||||||
try:
|
origin = os.getcwd()
|
||||||
print(f"Running pdflatex on {main_tex_path}")
|
try:
|
||||||
stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=120)
|
os.chdir(path)
|
||||||
# print(f"pdflatex output: {stdout.decode()}")
|
yield
|
||||||
# print(f"pdflatex errors: {stderr.decode()}")
|
finally:
|
||||||
except asyncio.TimeoutError:
|
os.chdir(origin)
|
||||||
return {"error": "Conversion timed out."}
|
|
||||||
|
|
||||||
|
async def run_latex_command(cmd, timeout=MAX_COMPILATION_TIME):
|
||||||
|
"""Run a LaTeX-related command with proper timeout and error handling."""
|
||||||
|
logger.info(f"Running command: {' '.join(cmd)}")
|
||||||
|
|
||||||
|
process = await asyncio.create_subprocess_exec(
|
||||||
|
*cmd,
|
||||||
|
stdout=asyncio.subprocess.PIPE,
|
||||||
|
stderr=asyncio.subprocess.PIPE
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
stdout, stderr = await asyncio.wait_for(
|
||||||
|
process.communicate(),
|
||||||
|
timeout=timeout
|
||||||
|
)
|
||||||
|
|
||||||
|
stdout_text = stdout.decode('utf-8', errors='replace')
|
||||||
|
stderr_text = stderr.decode('utf-8', errors='replace')
|
||||||
|
|
||||||
|
logger.info(f"Command returned with code {process.returncode}")
|
||||||
|
if process.returncode != 0:
|
||||||
|
logger.warning(f"Command failed with stderr: {stderr_text[:500]}...")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"returncode": process.returncode,
|
||||||
|
"stdout": stdout_text,
|
||||||
|
"stderr": stderr_text
|
||||||
|
}
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
# Try to terminate the process
|
||||||
|
logger.error(f"Command timed out after {timeout} seconds: {' '.join(cmd)}")
|
||||||
|
process.terminate()
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(process.wait(), timeout=5)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
# If it doesn't terminate, force kill
|
||||||
|
process.kill()
|
||||||
|
|
||||||
|
raise TimeoutError(f"Command timed out after {timeout} seconds: {' '.join(cmd)}")
|
||||||
|
|
||||||
if process.returncode != 0:
|
# Database operations
|
||||||
# Compilation failed
|
def store_job(job_id: str, job_data: Dict[str, Any]):
|
||||||
return {
|
"""Store job data in SQLite database"""
|
||||||
"error": "Conversion failed.",
|
current_time = time.time()
|
||||||
"details": {
|
|
||||||
"stderr": stderr.decode(),
|
# Extract fields from job_data
|
||||||
"stdout": stdout.decode(),
|
status = job_data.get("status", "unknown")
|
||||||
},
|
created_at = job_data.get("created_at", current_time)
|
||||||
}
|
work_dir = job_data.get("work_dir", "")
|
||||||
|
api_key = job_data.get("api_key", "")
|
||||||
|
options = json.dumps(job_data.get("options", {}))
|
||||||
|
error = job_data.get("error", "")
|
||||||
|
progress = job_data.get("progress", "")
|
||||||
|
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
conn.execute(
|
||||||
|
'''
|
||||||
|
INSERT OR REPLACE INTO jobs
|
||||||
|
(id, status, created_at, work_dir, api_key, options, error, progress, updated_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
''',
|
||||||
|
(job_id, status, created_at, work_dir, api_key, options, error, progress, current_time)
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
# Assuming the output PDF has the same base name as the main LaTeX file
|
def get_job(job_id: str) -> Optional[Dict[str, Any]]:
|
||||||
output_pdf_path = os.path.join(tmpdirname, 'main.pdf')
|
"""Retrieve job data from SQLite database"""
|
||||||
if os.path.exists(output_pdf_path):
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
with open(output_pdf_path, 'rb') as f:
|
conn.row_factory = sqlite3.Row
|
||||||
pdf_content = f.read()
|
cursor = conn.execute('SELECT * FROM jobs WHERE id = ?', (job_id,))
|
||||||
return StreamingResponse(BytesIO(pdf_content), media_type='application/pdf')
|
row = cursor.fetchone()
|
||||||
else:
|
|
||||||
return {"error": "PDF file not generated."}
|
if row:
|
||||||
else:
|
job_data = dict(row)
|
||||||
raise HTTPException(status_code=400, detail="Uploaded file is not a zip file.")
|
# Parse options back to dict
|
||||||
|
if job_data.get('options'):
|
||||||
|
job_data['options'] = json.loads(job_data['options'])
|
||||||
|
return job_data
|
||||||
|
return None
|
||||||
|
|
||||||
|
def update_job(job_id: str, updates: Dict[str, Any]):
|
||||||
|
"""Update specific fields in the job data"""
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# Start with SET updated_at=?
|
||||||
|
set_values = ["updated_at=?"]
|
||||||
|
params = [current_time]
|
||||||
|
|
||||||
|
# Add each update field
|
||||||
|
for key, value in updates.items():
|
||||||
|
if key == 'options':
|
||||||
|
value = json.dumps(value)
|
||||||
|
set_values.append(f"{key}=?")
|
||||||
|
params.append(value)
|
||||||
|
|
||||||
|
# Add job_id as the last parameter
|
||||||
|
params.append(job_id)
|
||||||
|
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
query = f"UPDATE jobs SET {', '.join(set_values)} WHERE id = ?"
|
||||||
|
conn.execute(query, params)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
def get_pdf_path(job_id: str) -> str:
|
||||||
|
"""Get the path where the PDF should be stored"""
|
||||||
|
return os.path.join(JOBS_DIR, f"{job_id}.pdf")
|
||||||
|
|
||||||
|
def store_pdf(job_id: str, pdf_content: bytes):
|
||||||
|
"""Store PDF in the filesystem"""
|
||||||
|
pdf_path = get_pdf_path(job_id)
|
||||||
|
os.makedirs(os.path.dirname(pdf_path), exist_ok=True)
|
||||||
|
|
||||||
|
with open(pdf_path, 'wb') as f:
|
||||||
|
f.write(pdf_content)
|
||||||
|
|
||||||
|
def get_pdf(job_id: str) -> Optional[bytes]:
|
||||||
|
"""Retrieve PDF from the filesystem"""
|
||||||
|
pdf_path = get_pdf_path(job_id)
|
||||||
|
if os.path.exists(pdf_path):
|
||||||
|
with open(pdf_path, 'rb') as f:
|
||||||
|
return f.read()
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def compile_latex(
|
||||||
|
job_id: str,
|
||||||
|
work_dir: str,
|
||||||
|
main_file: str,
|
||||||
|
num_runs: int,
|
||||||
|
use_bibtex: bool
|
||||||
|
):
|
||||||
|
"""Compile LaTeX document with proper error handling and multiple runs if needed."""
|
||||||
|
results = []
|
||||||
|
main_tex_path = os.path.join(work_dir, main_file)
|
||||||
|
|
||||||
|
# Verify the main file exists
|
||||||
|
if not os.path.exists(main_tex_path):
|
||||||
|
logger.error(f"Main LaTeX file not found: {main_tex_path}")
|
||||||
|
update_job(job_id, {
|
||||||
|
"status": "failed",
|
||||||
|
"error": f"Main LaTeX file ({main_file}) not found in the archive."
|
||||||
|
})
|
||||||
|
return False
|
||||||
|
|
||||||
|
# List directory contents for debugging
|
||||||
|
logger.info(f"Work directory contents: {os.listdir(work_dir)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with working_directory(work_dir):
|
||||||
|
# Run pdflatex multiple times as needed
|
||||||
|
for i in range(num_runs):
|
||||||
|
update_job(job_id, {
|
||||||
|
"status": "processing",
|
||||||
|
"progress": f"LaTeX compilation {i+1}/{num_runs}"
|
||||||
|
})
|
||||||
|
|
||||||
|
# For verbose output to diagnose issues
|
||||||
|
cmd = [
|
||||||
|
'pdflatex',
|
||||||
|
'-interaction=nonstopmode',
|
||||||
|
'-file-line-error',
|
||||||
|
main_file
|
||||||
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await run_latex_command(cmd)
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
# If compilation failed, stop and provide details
|
||||||
|
if result["returncode"] != 0:
|
||||||
|
# Extract relevant error messages
|
||||||
|
error_lines = []
|
||||||
|
for line in result["stdout"].split('\n'):
|
||||||
|
if ":" in line and ("Error" in line or "Fatal" in line):
|
||||||
|
error_lines.append(line)
|
||||||
|
|
||||||
|
error_message = "LaTeX compilation failed"
|
||||||
|
if error_lines:
|
||||||
|
error_message = f"LaTeX errors: {' | '.join(error_lines[:3])}"
|
||||||
|
|
||||||
|
update_job(job_id, {
|
||||||
|
"status": "failed",
|
||||||
|
"error": error_message,
|
||||||
|
"details": json.dumps(result)
|
||||||
|
})
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Run bibtex if requested (after the first pdflatex run)
|
||||||
|
if use_bibtex and i == 0:
|
||||||
|
update_job(job_id, {
|
||||||
|
"status": "processing",
|
||||||
|
"progress": "Running BibTeX"
|
||||||
|
})
|
||||||
|
|
||||||
|
basename = os.path.splitext(main_file)[0]
|
||||||
|
bibtex_cmd = ['bibtex', basename]
|
||||||
|
|
||||||
|
bibtex_result = await run_latex_command(bibtex_cmd)
|
||||||
|
results.append(bibtex_result)
|
||||||
|
|
||||||
|
except TimeoutError as e:
|
||||||
|
logger.error(f"Timeout during compilation: {str(e)}")
|
||||||
|
update_job(job_id, {
|
||||||
|
"status": "failed",
|
||||||
|
"error": str(e)
|
||||||
|
})
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error during compilation: {str(e)}", exc_info=True)
|
||||||
|
update_job(job_id, {
|
||||||
|
"status": "failed",
|
||||||
|
"error": f"Unexpected error: {str(e)}"
|
||||||
|
})
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Check if the PDF was generated
|
||||||
|
pdf_basename = os.path.splitext(main_file)[0]
|
||||||
|
pdf_path = os.path.join(work_dir, f"{pdf_basename}.pdf")
|
||||||
|
|
||||||
|
if not os.path.exists(pdf_path):
|
||||||
|
logger.error(f"PDF not generated at expected path: {pdf_path}")
|
||||||
|
update_job(job_id, {
|
||||||
|
"status": "failed",
|
||||||
|
"error": "PDF file not generated despite successful compilation"
|
||||||
|
})
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Store the PDF in the filesystem
|
||||||
|
with open(pdf_path, 'rb') as f:
|
||||||
|
pdf_content = f.read()
|
||||||
|
store_pdf(job_id, pdf_content)
|
||||||
|
|
||||||
|
# Update job status
|
||||||
|
update_job(job_id, {
|
||||||
|
"status": "completed",
|
||||||
|
})
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Exception in compile_latex: {str(e)}", exc_info=True)
|
||||||
|
update_job(job_id, {
|
||||||
|
"status": "failed",
|
||||||
|
"error": f"Unexpected error: {str(e)}"
|
||||||
|
})
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Clean up old jobs (runs in background)
|
||||||
|
async def cleanup_old_jobs():
|
||||||
|
"""Clean up old jobs and their resources"""
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
current_time = time.time()
|
||||||
|
expiry_time = current_time - JOB_EXPIRY
|
||||||
|
|
||||||
|
# Get expired jobs
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
cursor = conn.execute('SELECT id, work_dir FROM jobs WHERE created_at < ?', (expiry_time,))
|
||||||
|
expired_jobs = cursor.fetchall()
|
||||||
|
|
||||||
|
for job in expired_jobs:
|
||||||
|
job_id = job['id']
|
||||||
|
work_dir = job['work_dir']
|
||||||
|
|
||||||
|
# Clean up PDF if it exists
|
||||||
|
pdf_path = get_pdf_path(job_id)
|
||||||
|
if os.path.exists(pdf_path):
|
||||||
|
os.remove(pdf_path)
|
||||||
|
|
||||||
|
# Clean up work directory if it exists
|
||||||
|
if work_dir and os.path.exists(work_dir):
|
||||||
|
shutil.rmtree(work_dir, ignore_errors=True)
|
||||||
|
|
||||||
|
# Remove job from database
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
conn.execute('DELETE FROM jobs WHERE id = ?', (job_id,))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
logger.info(f"Cleaned up expired job {job_id}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in cleanup task: {str(e)}", exc_info=True)
|
||||||
|
|
||||||
|
# Run cleanup every 15 minutes
|
||||||
|
await asyncio.sleep(900)
|
||||||
|
|
||||||
|
@app.post("/tex2pdf",
|
||||||
|
dependencies=[Depends(check_rate_limit)],
|
||||||
|
summary="Convert LaTeX files to PDF",
|
||||||
|
response_description="Returns job ID for status checking")
|
||||||
|
async def convert_to_pdf(
|
||||||
|
background_tasks: BackgroundTasks,
|
||||||
|
request: Request,
|
||||||
|
zip_file: UploadFile = File(...),
|
||||||
|
options: Optional[ConversionOptions] = None
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Takes a zip file containing LaTeX files and compiles them into a PDF.
|
||||||
|
|
||||||
|
- The zip file must contain all necessary files for compilation
|
||||||
|
- By default, assumes main.tex is the main file unless specified otherwise
|
||||||
|
- Returns a job ID that can be used to check status and retrieve the PDF
|
||||||
|
"""
|
||||||
|
api_key = verify_api_key(request)
|
||||||
|
start_time = time.time()
|
||||||
|
job_id = str(uuid.uuid4())
|
||||||
|
|
||||||
|
if options is None:
|
||||||
|
options = ConversionOptions()
|
||||||
|
|
||||||
|
logger.info(f"Starting conversion job {job_id}")
|
||||||
|
|
||||||
|
# Validate input
|
||||||
|
if not zip_file.filename.endswith('.zip'):
|
||||||
|
logger.warning(f"Job {job_id}: Invalid file format: {zip_file.filename}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="Uploaded file must be a zip archive."
|
||||||
|
)
|
||||||
|
|
||||||
|
if not validate_latex_filename(options.main_file):
|
||||||
|
logger.warning(f"Job {job_id}: Invalid main file name: {options.main_file}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="Main file name must be a valid LaTeX filename (e.g., main.tex)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create the job record
|
||||||
|
job_data = {
|
||||||
|
"id": job_id,
|
||||||
|
"status": "uploading",
|
||||||
|
"created_at": start_time,
|
||||||
|
"options": options.dict(),
|
||||||
|
"api_key": api_key,
|
||||||
|
}
|
||||||
|
store_job(job_id, job_data)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create a temporary directory for this job
|
||||||
|
work_dir = tempfile.mkdtemp(prefix=f"tex2pdf_{job_id}_")
|
||||||
|
update_job(job_id, {
|
||||||
|
"status": "extracting",
|
||||||
|
"work_dir": work_dir
|
||||||
|
})
|
||||||
|
|
||||||
|
# Read zip file to memory
|
||||||
|
zip_content = await zip_file.read()
|
||||||
|
if len(zip_content) > MAX_UPLOAD_SIZE:
|
||||||
|
logger.warning(f"Job {job_id}: File too large: {len(zip_content)} bytes")
|
||||||
|
update_job(job_id, {
|
||||||
|
"status": "failed",
|
||||||
|
"error": f"File too large. Maximum size: {MAX_UPLOAD_SIZE/1024/1024} MB"
|
||||||
|
})
|
||||||
|
return {
|
||||||
|
"job_id": job_id,
|
||||||
|
"status": "failed",
|
||||||
|
"message": "File too large"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract zip files safely
|
||||||
|
try:
|
||||||
|
sanitize_zip_archive(BytesIO(zip_content), work_dir)
|
||||||
|
update_job(job_id, {"status": "queued"})
|
||||||
|
except ValueError as e:
|
||||||
|
logger.warning(f"Job {job_id}: Zip extraction failed: {str(e)}")
|
||||||
|
update_job(job_id, {
|
||||||
|
"status": "failed",
|
||||||
|
"error": f"Zip extraction failed: {str(e)}"
|
||||||
|
})
|
||||||
|
return {
|
||||||
|
"job_id": job_id,
|
||||||
|
"status": "failed",
|
||||||
|
"message": str(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Start compilation in background
|
||||||
|
background_tasks.add_task(
|
||||||
|
compile_latex,
|
||||||
|
job_id,
|
||||||
|
work_dir,
|
||||||
|
options.main_file,
|
||||||
|
options.num_runs,
|
||||||
|
options.use_bibtex
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"job_id": job_id,
|
||||||
|
"status": "processing",
|
||||||
|
"message": "Conversion job started"
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Job {job_id}: Unexpected error: {str(e)}", exc_info=True)
|
||||||
|
update_job(job_id, {
|
||||||
|
"status": "failed",
|
||||||
|
"error": f"Unexpected error: {str(e)}"
|
||||||
|
})
|
||||||
|
return {
|
||||||
|
"job_id": job_id,
|
||||||
|
"status": "failed",
|
||||||
|
"message": "Server error"
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.get("/tex2pdf/status/{job_id}",
|
||||||
|
dependencies=[Depends(verify_api_key)],
|
||||||
|
summary="Check the status of a conversion job")
|
||||||
|
async def check_job_status(job_id: str):
|
||||||
|
"""Check the status of a previously submitted conversion job."""
|
||||||
|
job = get_job(job_id)
|
||||||
|
if not job:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail="Job not found"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Clean sensitive or internal information
|
||||||
|
response = {
|
||||||
|
"job_id": job_id,
|
||||||
|
"status": job["status"],
|
||||||
|
"created_at": job["created_at"],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add error details if failed
|
||||||
|
if job["status"] == "failed" and "error" in job:
|
||||||
|
response["error"] = job["error"]
|
||||||
|
|
||||||
|
# Add progress info if processing
|
||||||
|
if job["status"] == "processing" and "progress" in job:
|
||||||
|
response["progress"] = job["progress"]
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
@app.get("/tex2pdf/download/{job_id}",
|
||||||
|
dependencies=[Depends(verify_api_key)],
|
||||||
|
summary="Download the generated PDF")
|
||||||
|
async def download_pdf(job_id: str):
|
||||||
|
"""Download the PDF generated by a completed conversion job."""
|
||||||
|
job = get_job(job_id)
|
||||||
|
if not job:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail="Job not found"
|
||||||
|
)
|
||||||
|
|
||||||
|
if job["status"] != "completed":
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"PDF not ready. Current status: {job['status']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Option 1: Get PDF from memory and stream it
|
||||||
|
# pdf_content = get_pdf(job_id)
|
||||||
|
# if not pdf_content:
|
||||||
|
# raise HTTPException(
|
||||||
|
# status_code=404,
|
||||||
|
# detail="PDF file not found in storage"
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# # Generate a filename based on the job ID
|
||||||
|
# filename = f"document_{job_id[-6:]}.pdf"
|
||||||
|
#
|
||||||
|
# return StreamingResponse(
|
||||||
|
# BytesIO(pdf_content),
|
||||||
|
# media_type='application/pdf',
|
||||||
|
# headers={"Content-Disposition": f"attachment; filename={filename}"}
|
||||||
|
# )
|
||||||
|
|
||||||
|
# Option 2: Use FileResponse for more efficient file serving
|
||||||
|
pdf_path = get_pdf_path(job_id)
|
||||||
|
if not os.path.exists(pdf_path):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail="PDF file not found in storage"
|
||||||
|
)
|
||||||
|
|
||||||
|
filename = f"document_{job_id[-6:]}.pdf"
|
||||||
|
|
||||||
|
return FileResponse(
|
||||||
|
pdf_path,
|
||||||
|
media_type='application/pdf',
|
||||||
|
filename=filename
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error delivering PDF for job {job_id}: {str(e)}", exc_info=True)
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail="Error retrieving PDF file"
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.get("/health", summary="Health check endpoint")
|
||||||
|
async def health_check():
|
||||||
|
"""Simple health check endpoint to verify the API is running."""
|
||||||
|
try:
|
||||||
|
# Check database connection
|
||||||
|
with sqlite3.connect(DB_PATH) as conn:
|
||||||
|
cursor = conn.execute("SELECT 1")
|
||||||
|
cursor.fetchone()
|
||||||
|
db_status = "connected"
|
||||||
|
except Exception as e:
|
||||||
|
db_status = f"error: {str(e)}"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "healthy",
|
||||||
|
"version": VERSION,
|
||||||
|
"database": db_status,
|
||||||
|
"storage": os.path.exists(JOBS_DIR) and os.access(JOBS_DIR, os.W_OK)
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup_event():
|
||||||
|
logger.info("Service starting up")
|
||||||
|
# Initialize the database
|
||||||
|
init_db()
|
||||||
|
# Start background cleanup task
|
||||||
|
asyncio.create_task(cleanup_old_jobs())
|
||||||
|
|
||||||
|
@app.on_event("shutdown")
|
||||||
|
async def shutdown_event():
|
||||||
|
"""Clean up on shutdown"""
|
||||||
|
logger.info("Service shutting down")
|
||||||
|
executor.shutdown(wait=False)
|
||||||
|
|
||||||
|
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
fastapi==0.110.0
|
||||||
|
uvicorn==0.27.1
|
||||||
|
pydantic==2.5.3
|
||||||
|
python-multipart==0.0.6
|
||||||
|
aiofiles==23.2.1
|
Loading…
Reference in New Issue
Block a user