Version 2.3.2 (Complete refactor)
This commit is contained in:
		
							parent
							
								
									dcb57e5a2b
								
							
						
					
					
						commit
						9963a25e4a
					
				
							
								
								
									
										10
									
								
								.dockerignore
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								.dockerignore
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,10 @@
 | 
			
		||||
.gitignore
 | 
			
		||||
.dockerignore
 | 
			
		||||
.git
 | 
			
		||||
.archive
 | 
			
		||||
.env
 | 
			
		||||
.env.*
 | 
			
		||||
logs
 | 
			
		||||
Dockerfile
 | 
			
		||||
README.md
 | 
			
		||||
docker-compose.yaml
 | 
			
		||||
							
								
								
									
										80
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										80
									
								
								Dockerfile
									
									
									
									
									
								
							@ -1,14 +1,72 @@
 | 
			
		||||
FROM archlinux:base
 | 
			
		||||
 | 
			
		||||
# Update and install necessary packages
 | 
			
		||||
RUN pacman -Sy --noconfirm curl reflector && \
 | 
			
		||||
    reflector --latest 5 --sort rate --save /etc/pacman.d/mirrorlist && \
 | 
			
		||||
    sed -i '/\[options\]/a XferCommand = /usr/bin/curl -C - --fail --retry 3 --retry-delay 3 -o %o %u' /etc/pacman.conf && \
 | 
			
		||||
    pacman -Syu --noconfirm --needed texlive-basic texlive-bibtexextra texlive-bin texlive-binextra texlive-context texlive-fontsrecommended texlive-fontsextra texlive-fontutils texlive-formatsextra texlive-langenglish texlive-langeuropean texlive-langfrench texlive-langgerman texlive-latex texlive-latexextra texlive-latexrecommended texlive-luatex texlive-mathscience texlive-metapost texlive-music texlive-pictures texlive-plaingeneric texlive-pstricks texlive-publishers && \
 | 
			
		||||
    pacman -Syu --noconfirm --needed python-fastapi uvicorn python-python-multipart && \
 | 
			
		||||
    yes | pacman -Scc
 | 
			
		||||
FROM python:3.11-slim AS builder
 | 
			
		||||
 | 
			
		||||
# Set working directory
 | 
			
		||||
WORKDIR /app
 | 
			
		||||
COPY main.py .
 | 
			
		||||
 | 
			
		||||
ENTRYPOINT ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
 | 
			
		||||
# Install build dependencies
 | 
			
		||||
RUN apt-get update && \
 | 
			
		||||
    apt-get install -y --no-install-recommends \
 | 
			
		||||
    build-essential \
 | 
			
		||||
    gcc \
 | 
			
		||||
    && rm -rf /var/lib/apt/lists/*
 | 
			
		||||
 | 
			
		||||
# Install Python dependencies
 | 
			
		||||
COPY requirements.txt .
 | 
			
		||||
RUN pip wheel --no-cache-dir --wheel-dir /app/wheels -r requirements.txt
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Create final image
 | 
			
		||||
FROM python:3.11-slim AS final
 | 
			
		||||
 | 
			
		||||
# Set environment variables
 | 
			
		||||
ENV PYTHONDONTWRITEBYTECODE=1 \
 | 
			
		||||
    PYTHONUNBUFFERED=1 \
 | 
			
		||||
    PYTHONPATH=/app \
 | 
			
		||||
    PORT=8000 \
 | 
			
		||||
    MAX_WORKERS=4 \
 | 
			
		||||
    TZ=UTC \
 | 
			
		||||
    JOBS_DIR=/data/jobs \
 | 
			
		||||
    DB_PATH=/data/db/jobs.db
 | 
			
		||||
 | 
			
		||||
# Create a non-root user
 | 
			
		||||
RUN groupadd -r appuser && useradd -r -g appuser appuser
 | 
			
		||||
 | 
			
		||||
# Install LaTeX and required dependencies
 | 
			
		||||
RUN apt-get update && apt-get install -y --no-install-recommends \
 | 
			
		||||
    texlive-full \
 | 
			
		||||
    tini \
 | 
			
		||||
    sqlite3 \
 | 
			
		||||
    && rm -rf /var/lib/apt/lists/*
 | 
			
		||||
 | 
			
		||||
# Set working directory
 | 
			
		||||
WORKDIR /app
 | 
			
		||||
 | 
			
		||||
# Create persistent directories
 | 
			
		||||
RUN mkdir -p /data/jobs /data/db /app/temp && \
 | 
			
		||||
    chown -R appuser:appuser /app /data
 | 
			
		||||
 | 
			
		||||
# Copy Python wheels from builder stage
 | 
			
		||||
COPY --from=builder /app/wheels /wheels
 | 
			
		||||
 | 
			
		||||
# Install Python dependencies
 | 
			
		||||
RUN pip install --no-cache-dir /wheels/* && rm -rf /wheels
 | 
			
		||||
 | 
			
		||||
# Copy application code
 | 
			
		||||
COPY . /app/
 | 
			
		||||
 | 
			
		||||
# Create entrypoint script that properly handles environment variables
 | 
			
		||||
RUN echo '#!/bin/sh\n\
 | 
			
		||||
exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers $MAX_WORKERS --log-level info\n\
 | 
			
		||||
' > /app/entrypoint.sh && chmod +x /app/entrypoint.sh
 | 
			
		||||
 | 
			
		||||
# Switch to non-root user
 | 
			
		||||
USER appuser
 | 
			
		||||
 | 
			
		||||
# Expose the service port
 | 
			
		||||
EXPOSE 8000
 | 
			
		||||
 | 
			
		||||
# Use tini as init
 | 
			
		||||
ENTRYPOINT ["/usr/bin/tini", "--"]
 | 
			
		||||
 | 
			
		||||
# Run the application with proper signal handling
 | 
			
		||||
CMD ["/app/entrypoint.sh"]
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										219
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										219
									
								
								README.md
									
									
									
									
									
								
							@ -1,58 +1,213 @@
 | 
			
		||||
# LaTeX-to-PDF Conversion Service
 | 
			
		||||
# LaTeX to PDF Conversion Service
 | 
			
		||||
 | 
			
		||||
This service provides an API endpoint to convert LaTeX documents into PDF format. It supports `.zip` file uploads containing the LaTeX source file (`main.tex`) and any associated files (e.g., images or additional `.tex` files).
 | 
			
		||||
A high-performance, secure REST API for converting LaTeX documents to PDF format.
 | 
			
		||||
 | 
			
		||||
## Getting Started
 | 
			
		||||
## Features
 | 
			
		||||
 | 
			
		||||
### Building the Docker Image
 | 
			
		||||
- **Simple API**: Upload a ZIP file containing LaTeX documents and get a PDF back
 | 
			
		||||
- **Secure Processing**: Comprehensive security measures including input validation and sanitization
 | 
			
		||||
- **Multiple Workers**: Designed for concurrency with shared file system and SQLite database
 | 
			
		||||
- **Robust Error Handling**: Detailed error messages with LaTeX compilation logs
 | 
			
		||||
- **Automatic Cleanup**: Background process removes expired PDFs and temporary files
 | 
			
		||||
- **Configurable Options**: Multiple compilation runs, BibTeX support, custom main file name
 | 
			
		||||
- **API Key Authentication**: Optional security layer with configurable API keys
 | 
			
		||||
- **Rate Limiting**: Protection against API abuse
 | 
			
		||||
- **Resource Control**: Limits on file sizes and compilation time
 | 
			
		||||
- **Docker Ready**: Ready-to-use Docker and Docker Compose configurations
 | 
			
		||||
 | 
			
		||||
To build the Docker image for the conversion service, navigate to the project directory and run:
 | 
			
		||||
## Quick Start
 | 
			
		||||
 | 
			
		||||
The easiest way to run the service is with Docker Compose:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
docker build -t rbehzadan/tex2pdf .
 | 
			
		||||
# Clone the repository
 | 
			
		||||
git clone https://github.com/yourusername/tex2pdf.git
 | 
			
		||||
cd tex2pdf
 | 
			
		||||
 | 
			
		||||
# Start the service
 | 
			
		||||
docker-compose up -d
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Running the Service
 | 
			
		||||
The service will be available at `http://localhost:8000`.
 | 
			
		||||
 | 
			
		||||
After building the image, you can start the service with the following command:
 | 
			
		||||
## API Usage
 | 
			
		||||
 | 
			
		||||
### Convert LaTeX to PDF
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
docker run -d -p 8000:8000 rbehzadan/tex2pdf
 | 
			
		||||
curl -X POST \
 | 
			
		||||
  -H "X-API-Key: 1234" \
 | 
			
		||||
  -F "zip_file=@my_latex_files.zip" \
 | 
			
		||||
  http://localhost:8000/tex2pdf
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
This command runs the Docker container in the background (`-d`) and maps port `8000` of the container to port `8000` on the host, making the service accessible at `http://localhost:8000`.
 | 
			
		||||
Response:
 | 
			
		||||
```json
 | 
			
		||||
{
 | 
			
		||||
  "job_id": "28f5bf9b-587f-4f3c-a3de-4d737d9736ce",
 | 
			
		||||
  "status": "processing",
 | 
			
		||||
  "message": "Conversion job started"
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## API Endpoint
 | 
			
		||||
 | 
			
		||||
The service exposes a single POST endpoint at `/tex2pdf` for converting LaTeX to PDF.
 | 
			
		||||
 | 
			
		||||
### Uploading a `.zip` File
 | 
			
		||||
 | 
			
		||||
The `.zip` file should contain a `main.tex` file and can include additional resources such as images or other `.tex` files used by `main.tex`.
 | 
			
		||||
 | 
			
		||||
## Manual Testing
 | 
			
		||||
 | 
			
		||||
### Testing with `curl`
 | 
			
		||||
 | 
			
		||||
To test the conversion service with `curl`, use the following command:
 | 
			
		||||
### Check Job Status
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
curl -X POST -F "zip_file=@path/to/your/file.zip" http://localhost:8000/tex2pdf -o output.pdf
 | 
			
		||||
curl -X GET \
 | 
			
		||||
  -H "X-API-Key: 1234" \
 | 
			
		||||
  http://localhost:8000/tex2pdf/status/28f5bf9b-587f-4f3c-a3de-4d737d9736ce
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Replace `path/to/your/file.zip` with the actual path to your `.zip` file. The resulting PDF will be saved as `output.pdf` in the current directory.
 | 
			
		||||
Response:
 | 
			
		||||
```json
 | 
			
		||||
{
 | 
			
		||||
  "job_id": "28f5bf9b-587f-4f3c-a3de-4d737d9736ce",
 | 
			
		||||
  "status": "completed",
 | 
			
		||||
  "created_at": 1741424390.6039968
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Testing with HTTPie
 | 
			
		||||
 | 
			
		||||
HTTPie offers a more user-friendly way to test the service. Use the following command for testing:
 | 
			
		||||
### Download PDF
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
http -f POST http://localhost:8000/tex2pdf zip_file@path/to/your/file.zip > output.pdf
 | 
			
		||||
curl -X GET \
 | 
			
		||||
  -H "X-API-Key: 1234" \
 | 
			
		||||
  -o output.pdf \
 | 
			
		||||
  http://localhost:8000/tex2pdf/download/28f5bf9b-587f-4f3c-a3de-4d737d9736ce
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
As with `curl`, replace `path/to/your/file.zip` with the path to your `.zip` file. The output will be redirected to `output.pdf` in the current directory.
 | 
			
		||||
### Health Check
 | 
			
		||||
 | 
			
		||||
## Troubleshooting
 | 
			
		||||
```bash
 | 
			
		||||
curl http://localhost:8000/health
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
If you encounter any issues with the conversion process, ensure that your `.zip` file is structured correctly, with a `main.tex` file at the root. For more detailed error information, consult the service logs.
 | 
			
		||||
Response:
 | 
			
		||||
```json
 | 
			
		||||
{
 | 
			
		||||
  "status": "healthy",
 | 
			
		||||
  "version": "1.0.0",
 | 
			
		||||
  "database": "connected",
 | 
			
		||||
  "storage": true
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Advanced Usage
 | 
			
		||||
 | 
			
		||||
### Compilation Options
 | 
			
		||||
 | 
			
		||||
You can customize the LaTeX compilation process:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
curl -X POST \
 | 
			
		||||
  -H "X-API-Key: 1234" \
 | 
			
		||||
  -F "zip_file=@my_latex_files.zip" \
 | 
			
		||||
  -F "options={\"main_file\": \"document.tex\", \"num_runs\": 3, \"use_bibtex\": true}" \
 | 
			
		||||
  http://localhost:8000/tex2pdf
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Options:
 | 
			
		||||
- `main_file`: Main LaTeX file to compile (default: `main.tex`)
 | 
			
		||||
- `num_runs`: Number of compilation runs (default: 2)
 | 
			
		||||
- `use_bibtex`: Run BibTeX for bibliography processing (default: false)
 | 
			
		||||
 | 
			
		||||
## ZIP File Requirements
 | 
			
		||||
 | 
			
		||||
- The ZIP file must contain all necessary files for LaTeX compilation
 | 
			
		||||
- By default, the service looks for `main.tex` as the main file
 | 
			
		||||
- All referenced files (images, styles, etc.) should be included
 | 
			
		||||
- Paths in LaTeX files should be relative and match the ZIP structure
 | 
			
		||||
 | 
			
		||||
## Configuration
 | 
			
		||||
 | 
			
		||||
The service can be configured via environment variables in the docker-compose.yml file:
 | 
			
		||||
 | 
			
		||||
| Variable | Description | Default |
 | 
			
		||||
|----------|-------------|---------|
 | 
			
		||||
| `ALLOWED_API_KEYS` | Comma-separated list of valid API keys | "" (empty = no auth) |
 | 
			
		||||
| `API_KEY_REQUIRED` | Enable/disable API key validation | "true" |
 | 
			
		||||
| `MAX_WORKERS` | Number of uvicorn workers | 2 |
 | 
			
		||||
| `MAX_UPLOAD_SIZE` | Maximum file upload size in bytes | 52428800 (50MB) |
 | 
			
		||||
| `MAX_COMPILATION_TIME` | Maximum LaTeX compilation time in seconds | 240 |
 | 
			
		||||
| `RATE_LIMIT_WINDOW` | Rate limiting window in seconds | 60 |
 | 
			
		||||
| `MAX_REQUESTS_PER_WINDOW` | Maximum requests per rate limit window | 10 |
 | 
			
		||||
| `JOB_EXPIRY` | Job expiry time in seconds | 3600 (1 hour) |
 | 
			
		||||
| `JOBS_DIR` | Directory for storing PDF files | "/data/jobs" |
 | 
			
		||||
| `DB_PATH` | Path to SQLite database | "/data/db/jobs.db" |
 | 
			
		||||
 | 
			
		||||
## Deployment
 | 
			
		||||
 | 
			
		||||
### System Requirements
 | 
			
		||||
 | 
			
		||||
- Docker and Docker Compose
 | 
			
		||||
- For running without Docker:
 | 
			
		||||
  - Python 3.10+
 | 
			
		||||
  - LaTeX distribution (texlive)
 | 
			
		||||
  - SQLite3
 | 
			
		||||
 | 
			
		||||
### Production Deployment Considerations
 | 
			
		||||
 | 
			
		||||
For production deployments, consider:
 | 
			
		||||
 | 
			
		||||
1. **Configure a reverse proxy** (like Nginx) with HTTPS
 | 
			
		||||
2. **Adjust resource limits** based on your workload
 | 
			
		||||
3. **Set strong API keys** and restrict access
 | 
			
		||||
4. **Mount persistent volumes** for job data
 | 
			
		||||
5. **Monitor disk usage** and adjust `JOB_EXPIRY` accordingly
 | 
			
		||||
6. **Set up logging** to a centralized logging service
 | 
			
		||||
 | 
			
		||||
## Architecture
 | 
			
		||||
 | 
			
		||||
The service uses a stateless design with background processing:
 | 
			
		||||
 | 
			
		||||
1. **FastAPI Application**: Handles HTTP requests and responses
 | 
			
		||||
2. **SQLite Database**: Stores job metadata and status
 | 
			
		||||
3. **File System**: Stores generated PDFs and temporary files
 | 
			
		||||
4. **Background Tasks**: Process LaTeX compilation asynchronously
 | 
			
		||||
 | 
			
		||||
## Development
 | 
			
		||||
 | 
			
		||||
### Local Development Setup
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
# Clone the repository
 | 
			
		||||
git clone https://github.com/yourusername/tex2pdf.git
 | 
			
		||||
cd tex2pdf
 | 
			
		||||
 | 
			
		||||
# Create a virtual environment
 | 
			
		||||
python -m venv venv
 | 
			
		||||
source venv/bin/activate  # On Windows: venv\Scripts\activate
 | 
			
		||||
 | 
			
		||||
# Install dependencies
 | 
			
		||||
pip install -r requirements.txt
 | 
			
		||||
 | 
			
		||||
# Run the service
 | 
			
		||||
uvicorn main:app --reload --host 0.0.0.0 --port 8000
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Running Tests
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
pytest tests/
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## License
 | 
			
		||||
 | 
			
		||||
[MIT License](LICENSE)
 | 
			
		||||
 | 
			
		||||
## Contributing
 | 
			
		||||
 | 
			
		||||
Contributions are welcome! Please feel free to submit a Pull Request.
 | 
			
		||||
 | 
			
		||||
## Security Considerations
 | 
			
		||||
 | 
			
		||||
While this service implements several security measures:
 | 
			
		||||
 | 
			
		||||
- API key authentication
 | 
			
		||||
- Input validation
 | 
			
		||||
- Rate limiting
 | 
			
		||||
- Safe ZIP extraction
 | 
			
		||||
- Process isolation
 | 
			
		||||
 | 
			
		||||
Be aware that allowing users to run LaTeX compilation on your server carries inherent risks. Always deploy behind a secure gateway in production environments.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										21
									
								
								docker-compose.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								docker-compose.yaml
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,21 @@
 | 
			
		||||
services:
 | 
			
		||||
  app:
 | 
			
		||||
    image: rbehzadan/tex2pdf
 | 
			
		||||
    container_name: tex2pdf
 | 
			
		||||
    ports:
 | 
			
		||||
      - "8000:8000"
 | 
			
		||||
    environment:
 | 
			
		||||
      - ALLOWED_API_KEYS=1234,5678,abcd  # Comma-separated list of allowed API keys
 | 
			
		||||
      - API_KEY_REQUIRED=true            # Set to "false" to disable API key validation
 | 
			
		||||
      - MAX_WORKERS=4                    # Number of uvicorn workers
 | 
			
		||||
      - MAX_UPLOAD_SIZE=52428800         # 50MB in bytes
 | 
			
		||||
      - MAX_COMPILATION_TIME=240         # Maximum LaTeX compilation time in seconds
 | 
			
		||||
      - RATE_LIMIT_WINDOW=60             # Rate limiting window in seconds
 | 
			
		||||
      - MAX_REQUESTS_PER_WINDOW=10       # Maximum requests per rate limit window
 | 
			
		||||
      - JOB_EXPIRY=3600                  # Job expiry time in seconds (1 hour)
 | 
			
		||||
    volumes:
 | 
			
		||||
      - pdf_data:/data
 | 
			
		||||
    restart: unless-stopped
 | 
			
		||||
 | 
			
		||||
volumes:
 | 
			
		||||
  pdf_data:
 | 
			
		||||
							
								
								
									
										738
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										738
									
								
								main.py
									
									
									
									
									
								
							@ -1,60 +1,706 @@
 | 
			
		||||
from fastapi import FastAPI, File, UploadFile, HTTPException
 | 
			
		||||
from fastapi.responses import StreamingResponse
 | 
			
		||||
from fastapi import FastAPI, File, UploadFile, HTTPException, Depends, BackgroundTasks, Request
 | 
			
		||||
from fastapi.responses import StreamingResponse, FileResponse
 | 
			
		||||
from io import BytesIO
 | 
			
		||||
import asyncio
 | 
			
		||||
import tempfile
 | 
			
		||||
import zipfile
 | 
			
		||||
import os
 | 
			
		||||
import logging
 | 
			
		||||
import shutil
 | 
			
		||||
import re
 | 
			
		||||
import uuid
 | 
			
		||||
import json
 | 
			
		||||
import time
 | 
			
		||||
from typing import Optional, Dict, List, Any
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
import contextlib
 | 
			
		||||
from pydantic import BaseModel, Field
 | 
			
		||||
import sqlite3
 | 
			
		||||
from concurrent.futures import ThreadPoolExecutor
 | 
			
		||||
 | 
			
		||||
app = FastAPI()
 | 
			
		||||
# Configure logging
 | 
			
		||||
logging.basicConfig(
 | 
			
		||||
    level=logging.INFO,
 | 
			
		||||
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 | 
			
		||||
)
 | 
			
		||||
logger = logging.getLogger("tex2pdf-service")
 | 
			
		||||
 | 
			
		||||
@app.post("/tex2pdf")
 | 
			
		||||
async def convert_to_pdf(zip_file: UploadFile = File(...)):
 | 
			
		||||
    if zip_file.filename.endswith('.zip'):
 | 
			
		||||
        with tempfile.TemporaryDirectory() as tmpdirname:
 | 
			
		||||
            # Unpack the zip file
 | 
			
		||||
            with zipfile.ZipFile(BytesIO(await zip_file.read())) as z:
 | 
			
		||||
                z.extractall(tmpdirname)
 | 
			
		||||
app = FastAPI(title="LaTeX to PDF Conversion Service")
 | 
			
		||||
 | 
			
		||||
# Configuration
 | 
			
		||||
MAX_UPLOAD_SIZE = int(os.environ.get("MAX_UPLOAD_SIZE", 50 * 1024 * 1024))  # Default: 50 MB
 | 
			
		||||
API_KEY_NAME = os.environ.get("API_KEY_NAME", "X-API-Key")
 | 
			
		||||
ALLOWED_API_KEYS = os.environ.get("ALLOWED_API_KEYS", "").split(",")
 | 
			
		||||
MAX_COMPILATION_TIME = int(os.environ.get("MAX_COMPILATION_TIME", 240))  # Default: 240 seconds
 | 
			
		||||
RATE_LIMIT_WINDOW = int(os.environ.get("RATE_LIMIT_WINDOW", 60))  # Default: 60 seconds
 | 
			
		||||
MAX_REQUESTS_PER_WINDOW = int(os.environ.get("MAX_REQUESTS_PER_WINDOW", 10))  # Default: 10 requests
 | 
			
		||||
JOB_EXPIRY = int(os.environ.get("JOB_EXPIRY", 3600))  # Default: 1 hour
 | 
			
		||||
JOBS_DIR = os.environ.get("JOBS_DIR", "/app/jobs")
 | 
			
		||||
DB_PATH = os.environ.get("DB_PATH", "/app/db/jobs.db")
 | 
			
		||||
API_KEY_REQUIRED = len(ALLOWED_API_KEYS) > 0
 | 
			
		||||
if API_KEY_REQUIRED:
 | 
			
		||||
    API_KEY_REQUIRED = os.environ.get("API_KEY_REQUIRED", "true").lower() in ("true", "1", "yes")
 | 
			
		||||
VERSION=open("VERSION").read().strip()
 | 
			
		||||
 | 
			
		||||
# Create necessary directories
 | 
			
		||||
os.makedirs(JOBS_DIR, exist_ok=True)
 | 
			
		||||
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
 | 
			
		||||
 | 
			
		||||
# Initialize SQLite database
 | 
			
		||||
def init_db():
 | 
			
		||||
    with sqlite3.connect(DB_PATH) as conn:
 | 
			
		||||
        conn.execute('''
 | 
			
		||||
        CREATE TABLE IF NOT EXISTS jobs (
 | 
			
		||||
            id TEXT PRIMARY KEY,
 | 
			
		||||
            status TEXT NOT NULL,
 | 
			
		||||
            created_at REAL NOT NULL,
 | 
			
		||||
            work_dir TEXT,
 | 
			
		||||
            api_key TEXT,
 | 
			
		||||
            options TEXT,
 | 
			
		||||
            error TEXT,
 | 
			
		||||
            progress TEXT,
 | 
			
		||||
            updated_at REAL NOT NULL
 | 
			
		||||
        )
 | 
			
		||||
        ''')
 | 
			
		||||
        # Add index for faster lookups
 | 
			
		||||
        conn.execute('CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status)')
 | 
			
		||||
        conn.execute('CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs(created_at)')
 | 
			
		||||
 | 
			
		||||
# Thread pool for database operations
 | 
			
		||||
executor = ThreadPoolExecutor(max_workers=4)
 | 
			
		||||
 | 
			
		||||
# In-memory rate limiting
 | 
			
		||||
rate_limits: Dict[str, List[float]] = {}
 | 
			
		||||
 | 
			
		||||
class ConversionOptions(BaseModel):
 | 
			
		||||
    main_file: str = Field(default="main.tex", description="Main LaTeX file to compile")
 | 
			
		||||
    num_runs: int = Field(default=2, ge=1, le=5, description="Number of compilation runs")
 | 
			
		||||
    use_bibtex: bool = Field(default=False, description="Run BibTeX for bibliography")
 | 
			
		||||
 | 
			
		||||
def verify_api_key(request: Request):
 | 
			
		||||
    # If API keys are not required, skip validation
 | 
			
		||||
    if not API_KEY_REQUIRED:
 | 
			
		||||
        return "no_auth"
 | 
			
		||||
        
 | 
			
		||||
            # Change working directory to tmpdirname
 | 
			
		||||
            os.chdir(tmpdirname)
 | 
			
		||||
    api_key = request.headers.get(API_KEY_NAME)
 | 
			
		||||
    
 | 
			
		||||
    # Check if API key is provided and valid
 | 
			
		||||
    if not api_key:
 | 
			
		||||
        logger.warning("Missing API key in request")
 | 
			
		||||
        raise HTTPException(
 | 
			
		||||
            status_code=401,
 | 
			
		||||
            detail="API key required",
 | 
			
		||||
        )
 | 
			
		||||
    
 | 
			
		||||
    if not ALLOWED_API_KEYS or api_key not in ALLOWED_API_KEYS:
 | 
			
		||||
        logger.warning(f"Unauthorized access attempt with API key: {api_key[:5]}...")
 | 
			
		||||
        raise HTTPException(
 | 
			
		||||
            status_code=401,
 | 
			
		||||
            detail="Invalid API key",
 | 
			
		||||
        )
 | 
			
		||||
    
 | 
			
		||||
    return api_key
 | 
			
		||||
 | 
			
		||||
def check_rate_limit(request: Request, api_key: str = Depends(verify_api_key)):
 | 
			
		||||
    client_id = api_key or request.client.host
 | 
			
		||||
    current_time = time.time()
 | 
			
		||||
    
 | 
			
		||||
    if client_id not in rate_limits:
 | 
			
		||||
        rate_limits[client_id] = []
 | 
			
		||||
    
 | 
			
		||||
    # Remove timestamps outside the window
 | 
			
		||||
    rate_limits[client_id] = [t for t in rate_limits[client_id] if current_time - t < RATE_LIMIT_WINDOW]
 | 
			
		||||
    
 | 
			
		||||
    if len(rate_limits[client_id]) >= MAX_REQUESTS_PER_WINDOW:
 | 
			
		||||
        logger.warning(f"Rate limit exceeded for {client_id[:5]}...")
 | 
			
		||||
        raise HTTPException(
 | 
			
		||||
            status_code=429,
 | 
			
		||||
            detail=f"Rate limit exceeded. Maximum {MAX_REQUESTS_PER_WINDOW} requests per {RATE_LIMIT_WINDOW} seconds.",
 | 
			
		||||
        )
 | 
			
		||||
    
 | 
			
		||||
    rate_limits[client_id].append(current_time)
 | 
			
		||||
    return client_id
 | 
			
		||||
 | 
			
		||||
def validate_latex_filename(filename: str) -> bool:
 | 
			
		||||
    """Validate if the filename follows safe LaTeX filename conventions."""
 | 
			
		||||
    return bool(re.match(r'^[a-zA-Z0-9_\-\.]+\.tex$', filename))
 | 
			
		||||
 | 
			
		||||
def sanitize_zip_archive(zip_file_obj, extract_path):
 | 
			
		||||
    """Extracts zip contents safely, preventing directory traversal attacks."""
 | 
			
		||||
    try:
 | 
			
		||||
        with zipfile.ZipFile(zip_file_obj) as zip_ref:
 | 
			
		||||
            # Log zip contents for debugging
 | 
			
		||||
            logger.info(f"ZIP contents: {zip_ref.namelist()}")
 | 
			
		||||
            
 | 
			
		||||
            # Find the main LaTeX file (assuming a convention, e.g., main.tex)
 | 
			
		||||
            main_tex_file = 'main.tex'
 | 
			
		||||
            main_tex_path = os.path.join(tmpdirname, main_tex_file)
 | 
			
		||||
            if not os.path.exists(main_tex_path):
 | 
			
		||||
                raise HTTPException(status_code=400, detail="Main LaTeX file (main.tex) not found in the zip.")
 | 
			
		||||
            # First, check for suspicious paths
 | 
			
		||||
            for file_info in zip_ref.infolist():
 | 
			
		||||
                # Convert to Path for safer path handling
 | 
			
		||||
                file_path = Path(file_info.filename)
 | 
			
		||||
                
 | 
			
		||||
                # Check for absolute paths or directory traversal attempts
 | 
			
		||||
                if file_path.is_absolute() or '..' in file_path.parts:
 | 
			
		||||
                    raise ValueError(f"Suspicious path detected: {file_info.filename}")
 | 
			
		||||
                
 | 
			
		||||
                # Check for extremely large files
 | 
			
		||||
                if file_info.file_size > MAX_UPLOAD_SIZE:
 | 
			
		||||
                    raise ValueError(f"File too large: {file_info.filename}")
 | 
			
		||||
                    
 | 
			
		||||
            # If all files pass validation, extract them
 | 
			
		||||
            for file_info in zip_ref.infolist():
 | 
			
		||||
                # Skip directories
 | 
			
		||||
                if file_info.filename.endswith('/'):
 | 
			
		||||
                    continue
 | 
			
		||||
                    
 | 
			
		||||
                # Create a safe extraction path
 | 
			
		||||
                target_path = Path(extract_path) / file_info.filename
 | 
			
		||||
                
 | 
			
		||||
                # Create parent directories if they don't exist
 | 
			
		||||
                target_path.parent.mkdir(parents=True, exist_ok=True)
 | 
			
		||||
                
 | 
			
		||||
                # Extract the file
 | 
			
		||||
                with zip_ref.open(file_info) as source, open(target_path, 'wb') as target:
 | 
			
		||||
                    shutil.copyfileobj(source, target)
 | 
			
		||||
                    
 | 
			
		||||
            # List extracted files for debugging
 | 
			
		||||
            extracted_files = list(Path(extract_path).glob('**/*'))
 | 
			
		||||
            logger.info(f"Extracted files: {[str(f.relative_to(extract_path)) for f in extracted_files]}")
 | 
			
		||||
            
 | 
			
		||||
        return True
 | 
			
		||||
    except zipfile.BadZipFile:
 | 
			
		||||
        raise ValueError("Invalid ZIP file format")
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Error during ZIP extraction: {str(e)}", exc_info=True)
 | 
			
		||||
        raise ValueError(f"Error extracting ZIP: {str(e)}")
 | 
			
		||||
 | 
			
		||||
            # Compile the LaTeX document
 | 
			
		||||
            cmd = ['pdflatex', '-interaction=nonstopmode', '-output-directory', tmpdirname, main_tex_path]
 | 
			
		||||
            process = await asyncio.create_subprocess_exec(*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
 | 
			
		||||
            try:
 | 
			
		||||
                print(f"Running pdflatex on {main_tex_path}")
 | 
			
		||||
                stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=120)
 | 
			
		||||
                # print(f"pdflatex output: {stdout.decode()}")
 | 
			
		||||
                # print(f"pdflatex errors: {stderr.decode()}")
 | 
			
		||||
            except asyncio.TimeoutError:
 | 
			
		||||
                return {"error": "Conversion timed out."}
 | 
			
		||||
@contextlib.contextmanager
 | 
			
		||||
def working_directory(path):
 | 
			
		||||
    """Changes working directory within the context and reverts back afterwards."""
 | 
			
		||||
    origin = os.getcwd()
 | 
			
		||||
    try:
 | 
			
		||||
        os.chdir(path)
 | 
			
		||||
        yield
 | 
			
		||||
    finally:
 | 
			
		||||
        os.chdir(origin)
 | 
			
		||||
 | 
			
		||||
async def run_latex_command(cmd, timeout=MAX_COMPILATION_TIME):
 | 
			
		||||
    """Run a LaTeX-related command with proper timeout and error handling."""
 | 
			
		||||
    logger.info(f"Running command: {' '.join(cmd)}")
 | 
			
		||||
    
 | 
			
		||||
    process = await asyncio.create_subprocess_exec(
 | 
			
		||||
        *cmd, 
 | 
			
		||||
        stdout=asyncio.subprocess.PIPE, 
 | 
			
		||||
        stderr=asyncio.subprocess.PIPE
 | 
			
		||||
    )
 | 
			
		||||
    
 | 
			
		||||
    try:
 | 
			
		||||
        stdout, stderr = await asyncio.wait_for(
 | 
			
		||||
            process.communicate(), 
 | 
			
		||||
            timeout=timeout
 | 
			
		||||
        )
 | 
			
		||||
        
 | 
			
		||||
        stdout_text = stdout.decode('utf-8', errors='replace')
 | 
			
		||||
        stderr_text = stderr.decode('utf-8', errors='replace')
 | 
			
		||||
        
 | 
			
		||||
        logger.info(f"Command returned with code {process.returncode}")
 | 
			
		||||
        if process.returncode != 0:
 | 
			
		||||
            logger.warning(f"Command failed with stderr: {stderr_text[:500]}...")
 | 
			
		||||
            
 | 
			
		||||
        return {
 | 
			
		||||
            "returncode": process.returncode,
 | 
			
		||||
            "stdout": stdout_text,
 | 
			
		||||
            "stderr": stderr_text
 | 
			
		||||
        }
 | 
			
		||||
    except asyncio.TimeoutError:
 | 
			
		||||
        # Try to terminate the process
 | 
			
		||||
        logger.error(f"Command timed out after {timeout} seconds: {' '.join(cmd)}")
 | 
			
		||||
        process.terminate()
 | 
			
		||||
        try:
 | 
			
		||||
            await asyncio.wait_for(process.wait(), timeout=5)
 | 
			
		||||
        except asyncio.TimeoutError:
 | 
			
		||||
            # If it doesn't terminate, force kill
 | 
			
		||||
            process.kill()
 | 
			
		||||
        
 | 
			
		||||
        raise TimeoutError(f"Command timed out after {timeout} seconds: {' '.join(cmd)}")
 | 
			
		||||
 | 
			
		||||
            if process.returncode != 0:
 | 
			
		||||
                # Compilation failed
 | 
			
		||||
                return {
 | 
			
		||||
                    "error": "Conversion failed.",
 | 
			
		||||
                    "details": {
 | 
			
		||||
                        "stderr": stderr.decode(),
 | 
			
		||||
                        "stdout": stdout.decode(),
 | 
			
		||||
                    },
 | 
			
		||||
                }
 | 
			
		||||
# Database operations
 | 
			
		||||
def store_job(job_id: str, job_data: Dict[str, Any]):
 | 
			
		||||
    """Store job data in SQLite database"""
 | 
			
		||||
    current_time = time.time()
 | 
			
		||||
    
 | 
			
		||||
    # Extract fields from job_data
 | 
			
		||||
    status = job_data.get("status", "unknown")
 | 
			
		||||
    created_at = job_data.get("created_at", current_time)
 | 
			
		||||
    work_dir = job_data.get("work_dir", "")
 | 
			
		||||
    api_key = job_data.get("api_key", "")
 | 
			
		||||
    options = json.dumps(job_data.get("options", {}))
 | 
			
		||||
    error = job_data.get("error", "")
 | 
			
		||||
    progress = job_data.get("progress", "")
 | 
			
		||||
    
 | 
			
		||||
    with sqlite3.connect(DB_PATH) as conn:
 | 
			
		||||
        conn.execute(
 | 
			
		||||
            '''
 | 
			
		||||
            INSERT OR REPLACE INTO jobs 
 | 
			
		||||
            (id, status, created_at, work_dir, api_key, options, error, progress, updated_at) 
 | 
			
		||||
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
 | 
			
		||||
            ''', 
 | 
			
		||||
            (job_id, status, created_at, work_dir, api_key, options, error, progress, current_time)
 | 
			
		||||
        )
 | 
			
		||||
        conn.commit()
 | 
			
		||||
 | 
			
		||||
            # Assuming the output PDF has the same base name as the main LaTeX file
 | 
			
		||||
            output_pdf_path = os.path.join(tmpdirname, 'main.pdf')
 | 
			
		||||
            if os.path.exists(output_pdf_path):
 | 
			
		||||
                with open(output_pdf_path, 'rb') as f:
 | 
			
		||||
                    pdf_content = f.read()
 | 
			
		||||
                return StreamingResponse(BytesIO(pdf_content), media_type='application/pdf')
 | 
			
		||||
            else:
 | 
			
		||||
                return {"error": "PDF file not generated."}
 | 
			
		||||
    else:
 | 
			
		||||
        raise HTTPException(status_code=400, detail="Uploaded file is not a zip file.")
 | 
			
		||||
def get_job(job_id: str) -> Optional[Dict[str, Any]]:
 | 
			
		||||
    """Retrieve job data from SQLite database"""
 | 
			
		||||
    with sqlite3.connect(DB_PATH) as conn:
 | 
			
		||||
        conn.row_factory = sqlite3.Row
 | 
			
		||||
        cursor = conn.execute('SELECT * FROM jobs WHERE id = ?', (job_id,))
 | 
			
		||||
        row = cursor.fetchone()
 | 
			
		||||
        
 | 
			
		||||
    if row:
 | 
			
		||||
        job_data = dict(row)
 | 
			
		||||
        # Parse options back to dict
 | 
			
		||||
        if job_data.get('options'):
 | 
			
		||||
            job_data['options'] = json.loads(job_data['options'])
 | 
			
		||||
        return job_data
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
def update_job(job_id: str, updates: Dict[str, Any]):
 | 
			
		||||
    """Update specific fields in the job data"""
 | 
			
		||||
    current_time = time.time()
 | 
			
		||||
    
 | 
			
		||||
    # Start with SET updated_at=?
 | 
			
		||||
    set_values = ["updated_at=?"]
 | 
			
		||||
    params = [current_time]
 | 
			
		||||
    
 | 
			
		||||
    # Add each update field
 | 
			
		||||
    for key, value in updates.items():
 | 
			
		||||
        if key == 'options':
 | 
			
		||||
            value = json.dumps(value)
 | 
			
		||||
        set_values.append(f"{key}=?")
 | 
			
		||||
        params.append(value)
 | 
			
		||||
    
 | 
			
		||||
    # Add job_id as the last parameter
 | 
			
		||||
    params.append(job_id)
 | 
			
		||||
    
 | 
			
		||||
    with sqlite3.connect(DB_PATH) as conn:
 | 
			
		||||
        query = f"UPDATE jobs SET {', '.join(set_values)} WHERE id = ?"
 | 
			
		||||
        conn.execute(query, params)
 | 
			
		||||
        conn.commit()
 | 
			
		||||
 | 
			
		||||
def get_pdf_path(job_id: str) -> str:
 | 
			
		||||
    """Get the path where the PDF should be stored"""
 | 
			
		||||
    return os.path.join(JOBS_DIR, f"{job_id}.pdf")
 | 
			
		||||
 | 
			
		||||
def store_pdf(job_id: str, pdf_content: bytes):
 | 
			
		||||
    """Store PDF in the filesystem"""
 | 
			
		||||
    pdf_path = get_pdf_path(job_id)
 | 
			
		||||
    os.makedirs(os.path.dirname(pdf_path), exist_ok=True)
 | 
			
		||||
    
 | 
			
		||||
    with open(pdf_path, 'wb') as f:
 | 
			
		||||
        f.write(pdf_content)
 | 
			
		||||
 | 
			
		||||
def get_pdf(job_id: str) -> Optional[bytes]:
 | 
			
		||||
    """Retrieve PDF from the filesystem"""
 | 
			
		||||
    pdf_path = get_pdf_path(job_id)
 | 
			
		||||
    if os.path.exists(pdf_path):
 | 
			
		||||
        with open(pdf_path, 'rb') as f:
 | 
			
		||||
            return f.read()
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
async def compile_latex(
 | 
			
		||||
    job_id: str,
 | 
			
		||||
    work_dir: str,
 | 
			
		||||
    main_file: str,
 | 
			
		||||
    num_runs: int,
 | 
			
		||||
    use_bibtex: bool
 | 
			
		||||
):
 | 
			
		||||
    """Compile LaTeX document with proper error handling and multiple runs if needed."""
 | 
			
		||||
    results = []
 | 
			
		||||
    main_tex_path = os.path.join(work_dir, main_file)
 | 
			
		||||
    
 | 
			
		||||
    # Verify the main file exists
 | 
			
		||||
    if not os.path.exists(main_tex_path):
 | 
			
		||||
        logger.error(f"Main LaTeX file not found: {main_tex_path}")
 | 
			
		||||
        update_job(job_id, {
 | 
			
		||||
            "status": "failed",
 | 
			
		||||
            "error": f"Main LaTeX file ({main_file}) not found in the archive."
 | 
			
		||||
        })
 | 
			
		||||
        return False
 | 
			
		||||
    
 | 
			
		||||
    # List directory contents for debugging
 | 
			
		||||
    logger.info(f"Work directory contents: {os.listdir(work_dir)}")
 | 
			
		||||
    
 | 
			
		||||
    try:
 | 
			
		||||
        with working_directory(work_dir):
 | 
			
		||||
            # Run pdflatex multiple times as needed
 | 
			
		||||
            for i in range(num_runs):
 | 
			
		||||
                update_job(job_id, {
 | 
			
		||||
                    "status": "processing",
 | 
			
		||||
                    "progress": f"LaTeX compilation {i+1}/{num_runs}"
 | 
			
		||||
                })
 | 
			
		||||
                
 | 
			
		||||
                # For verbose output to diagnose issues
 | 
			
		||||
                cmd = [
 | 
			
		||||
                    'pdflatex', 
 | 
			
		||||
                    '-interaction=nonstopmode',
 | 
			
		||||
                    '-file-line-error',
 | 
			
		||||
                    main_file
 | 
			
		||||
                ]
 | 
			
		||||
                
 | 
			
		||||
                try:
 | 
			
		||||
                    result = await run_latex_command(cmd)
 | 
			
		||||
                    results.append(result)
 | 
			
		||||
                    
 | 
			
		||||
                    # If compilation failed, stop and provide details
 | 
			
		||||
                    if result["returncode"] != 0:
 | 
			
		||||
                        # Extract relevant error messages
 | 
			
		||||
                        error_lines = []
 | 
			
		||||
                        for line in result["stdout"].split('\n'):
 | 
			
		||||
                            if ":" in line and ("Error" in line or "Fatal" in line):
 | 
			
		||||
                                error_lines.append(line)
 | 
			
		||||
                        
 | 
			
		||||
                        error_message = "LaTeX compilation failed"
 | 
			
		||||
                        if error_lines:
 | 
			
		||||
                            error_message = f"LaTeX errors: {' | '.join(error_lines[:3])}"
 | 
			
		||||
                        
 | 
			
		||||
                        update_job(job_id, {
 | 
			
		||||
                            "status": "failed",
 | 
			
		||||
                            "error": error_message,
 | 
			
		||||
                            "details": json.dumps(result)
 | 
			
		||||
                        })
 | 
			
		||||
                        return False
 | 
			
		||||
                    
 | 
			
		||||
                    # Run bibtex if requested (after the first pdflatex run)
 | 
			
		||||
                    if use_bibtex and i == 0:
 | 
			
		||||
                        update_job(job_id, {
 | 
			
		||||
                            "status": "processing",
 | 
			
		||||
                            "progress": "Running BibTeX"
 | 
			
		||||
                        })
 | 
			
		||||
                        
 | 
			
		||||
                        basename = os.path.splitext(main_file)[0]
 | 
			
		||||
                        bibtex_cmd = ['bibtex', basename]
 | 
			
		||||
                        
 | 
			
		||||
                        bibtex_result = await run_latex_command(bibtex_cmd)
 | 
			
		||||
                        results.append(bibtex_result)
 | 
			
		||||
                
 | 
			
		||||
                except TimeoutError as e:
 | 
			
		||||
                    logger.error(f"Timeout during compilation: {str(e)}")
 | 
			
		||||
                    update_job(job_id, {
 | 
			
		||||
                        "status": "failed",
 | 
			
		||||
                        "error": str(e)
 | 
			
		||||
                    })
 | 
			
		||||
                    return False
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    logger.error(f"Unexpected error during compilation: {str(e)}", exc_info=True)
 | 
			
		||||
                    update_job(job_id, {
 | 
			
		||||
                        "status": "failed",
 | 
			
		||||
                        "error": f"Unexpected error: {str(e)}"
 | 
			
		||||
                    })
 | 
			
		||||
                    return False
 | 
			
		||||
            
 | 
			
		||||
            # Check if the PDF was generated
 | 
			
		||||
            pdf_basename = os.path.splitext(main_file)[0]
 | 
			
		||||
            pdf_path = os.path.join(work_dir, f"{pdf_basename}.pdf")
 | 
			
		||||
            
 | 
			
		||||
            if not os.path.exists(pdf_path):
 | 
			
		||||
                logger.error(f"PDF not generated at expected path: {pdf_path}")
 | 
			
		||||
                update_job(job_id, {
 | 
			
		||||
                    "status": "failed",
 | 
			
		||||
                    "error": "PDF file not generated despite successful compilation"
 | 
			
		||||
                })
 | 
			
		||||
                return False
 | 
			
		||||
            
 | 
			
		||||
            # Store the PDF in the filesystem
 | 
			
		||||
            with open(pdf_path, 'rb') as f:
 | 
			
		||||
                pdf_content = f.read()
 | 
			
		||||
                store_pdf(job_id, pdf_content)
 | 
			
		||||
            
 | 
			
		||||
            # Update job status
 | 
			
		||||
            update_job(job_id, {
 | 
			
		||||
                "status": "completed",
 | 
			
		||||
            })
 | 
			
		||||
            return True
 | 
			
		||||
            
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Exception in compile_latex: {str(e)}", exc_info=True)
 | 
			
		||||
        update_job(job_id, {
 | 
			
		||||
            "status": "failed",
 | 
			
		||||
            "error": f"Unexpected error: {str(e)}"
 | 
			
		||||
        })
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
# Clean up old jobs (runs in background)
 | 
			
		||||
async def cleanup_old_jobs():
 | 
			
		||||
    """Clean up old jobs and their resources"""
 | 
			
		||||
    while True:
 | 
			
		||||
        try:
 | 
			
		||||
            current_time = time.time()
 | 
			
		||||
            expiry_time = current_time - JOB_EXPIRY
 | 
			
		||||
            
 | 
			
		||||
            # Get expired jobs
 | 
			
		||||
            with sqlite3.connect(DB_PATH) as conn:
 | 
			
		||||
                conn.row_factory = sqlite3.Row
 | 
			
		||||
                cursor = conn.execute('SELECT id, work_dir FROM jobs WHERE created_at < ?', (expiry_time,))
 | 
			
		||||
                expired_jobs = cursor.fetchall()
 | 
			
		||||
            
 | 
			
		||||
            for job in expired_jobs:
 | 
			
		||||
                job_id = job['id']
 | 
			
		||||
                work_dir = job['work_dir']
 | 
			
		||||
                
 | 
			
		||||
                # Clean up PDF if it exists
 | 
			
		||||
                pdf_path = get_pdf_path(job_id)
 | 
			
		||||
                if os.path.exists(pdf_path):
 | 
			
		||||
                    os.remove(pdf_path)
 | 
			
		||||
                
 | 
			
		||||
                # Clean up work directory if it exists
 | 
			
		||||
                if work_dir and os.path.exists(work_dir):
 | 
			
		||||
                    shutil.rmtree(work_dir, ignore_errors=True)
 | 
			
		||||
                
 | 
			
		||||
                # Remove job from database
 | 
			
		||||
                with sqlite3.connect(DB_PATH) as conn:
 | 
			
		||||
                    conn.execute('DELETE FROM jobs WHERE id = ?', (job_id,))
 | 
			
		||||
                    conn.commit()
 | 
			
		||||
                
 | 
			
		||||
                logger.info(f"Cleaned up expired job {job_id}")
 | 
			
		||||
                
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Error in cleanup task: {str(e)}", exc_info=True)
 | 
			
		||||
        
 | 
			
		||||
        # Run cleanup every 15 minutes
 | 
			
		||||
        await asyncio.sleep(900)
 | 
			
		||||
 | 
			
		||||
@app.post("/tex2pdf", 
 | 
			
		||||
          dependencies=[Depends(check_rate_limit)],
 | 
			
		||||
          summary="Convert LaTeX files to PDF",
 | 
			
		||||
          response_description="Returns job ID for status checking")
 | 
			
		||||
async def convert_to_pdf(
 | 
			
		||||
    background_tasks: BackgroundTasks,
 | 
			
		||||
    request: Request,
 | 
			
		||||
    zip_file: UploadFile = File(...),
 | 
			
		||||
    options: Optional[ConversionOptions] = None
 | 
			
		||||
):
 | 
			
		||||
    """
 | 
			
		||||
    Takes a zip file containing LaTeX files and compiles them into a PDF.
 | 
			
		||||
    
 | 
			
		||||
    - The zip file must contain all necessary files for compilation
 | 
			
		||||
    - By default, assumes main.tex is the main file unless specified otherwise
 | 
			
		||||
    - Returns a job ID that can be used to check status and retrieve the PDF
 | 
			
		||||
    """
 | 
			
		||||
    api_key = verify_api_key(request)
 | 
			
		||||
    start_time = time.time()
 | 
			
		||||
    job_id = str(uuid.uuid4())
 | 
			
		||||
    
 | 
			
		||||
    if options is None:
 | 
			
		||||
        options = ConversionOptions()
 | 
			
		||||
    
 | 
			
		||||
    logger.info(f"Starting conversion job {job_id}")
 | 
			
		||||
    
 | 
			
		||||
    # Validate input
 | 
			
		||||
    if not zip_file.filename.endswith('.zip'):
 | 
			
		||||
        logger.warning(f"Job {job_id}: Invalid file format: {zip_file.filename}")
 | 
			
		||||
        raise HTTPException(
 | 
			
		||||
            status_code=400, 
 | 
			
		||||
            detail="Uploaded file must be a zip archive."
 | 
			
		||||
        )
 | 
			
		||||
    
 | 
			
		||||
    if not validate_latex_filename(options.main_file):
 | 
			
		||||
        logger.warning(f"Job {job_id}: Invalid main file name: {options.main_file}")
 | 
			
		||||
        raise HTTPException(
 | 
			
		||||
            status_code=400, 
 | 
			
		||||
            detail="Main file name must be a valid LaTeX filename (e.g., main.tex)"
 | 
			
		||||
        )
 | 
			
		||||
    
 | 
			
		||||
    # Create the job record
 | 
			
		||||
    job_data = {
 | 
			
		||||
        "id": job_id,
 | 
			
		||||
        "status": "uploading",
 | 
			
		||||
        "created_at": start_time,
 | 
			
		||||
        "options": options.dict(),
 | 
			
		||||
        "api_key": api_key,
 | 
			
		||||
    }
 | 
			
		||||
    store_job(job_id, job_data)
 | 
			
		||||
    
 | 
			
		||||
    try:
 | 
			
		||||
        # Create a temporary directory for this job
 | 
			
		||||
        work_dir = tempfile.mkdtemp(prefix=f"tex2pdf_{job_id}_")
 | 
			
		||||
        update_job(job_id, {
 | 
			
		||||
            "status": "extracting",
 | 
			
		||||
            "work_dir": work_dir
 | 
			
		||||
        })
 | 
			
		||||
        
 | 
			
		||||
        # Read zip file to memory
 | 
			
		||||
        zip_content = await zip_file.read()
 | 
			
		||||
        if len(zip_content) > MAX_UPLOAD_SIZE:
 | 
			
		||||
            logger.warning(f"Job {job_id}: File too large: {len(zip_content)} bytes")
 | 
			
		||||
            update_job(job_id, {
 | 
			
		||||
                "status": "failed",
 | 
			
		||||
                "error": f"File too large. Maximum size: {MAX_UPLOAD_SIZE/1024/1024} MB"
 | 
			
		||||
            })
 | 
			
		||||
            return {
 | 
			
		||||
                "job_id": job_id, 
 | 
			
		||||
                "status": "failed", 
 | 
			
		||||
                "message": "File too large"
 | 
			
		||||
            }
 | 
			
		||||
        
 | 
			
		||||
        # Extract zip files safely
 | 
			
		||||
        try:
 | 
			
		||||
            sanitize_zip_archive(BytesIO(zip_content), work_dir)
 | 
			
		||||
            update_job(job_id, {"status": "queued"})
 | 
			
		||||
        except ValueError as e:
 | 
			
		||||
            logger.warning(f"Job {job_id}: Zip extraction failed: {str(e)}")
 | 
			
		||||
            update_job(job_id, {
 | 
			
		||||
                "status": "failed",
 | 
			
		||||
                "error": f"Zip extraction failed: {str(e)}"
 | 
			
		||||
            })
 | 
			
		||||
            return {
 | 
			
		||||
                "job_id": job_id, 
 | 
			
		||||
                "status": "failed", 
 | 
			
		||||
                "message": str(e)
 | 
			
		||||
            }
 | 
			
		||||
        
 | 
			
		||||
        # Start compilation in background
 | 
			
		||||
        background_tasks.add_task(
 | 
			
		||||
            compile_latex,
 | 
			
		||||
            job_id,
 | 
			
		||||
            work_dir,
 | 
			
		||||
            options.main_file,
 | 
			
		||||
            options.num_runs,
 | 
			
		||||
            options.use_bibtex
 | 
			
		||||
        )
 | 
			
		||||
        
 | 
			
		||||
        return {
 | 
			
		||||
            "job_id": job_id,
 | 
			
		||||
            "status": "processing",
 | 
			
		||||
            "message": "Conversion job started"
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Job {job_id}: Unexpected error: {str(e)}", exc_info=True)
 | 
			
		||||
        update_job(job_id, {
 | 
			
		||||
            "status": "failed",
 | 
			
		||||
            "error": f"Unexpected error: {str(e)}"
 | 
			
		||||
        })
 | 
			
		||||
        return {
 | 
			
		||||
            "job_id": job_id, 
 | 
			
		||||
            "status": "failed", 
 | 
			
		||||
            "message": "Server error"
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
@app.get("/tex2pdf/status/{job_id}", 
 | 
			
		||||
         dependencies=[Depends(verify_api_key)],
 | 
			
		||||
         summary="Check the status of a conversion job")
 | 
			
		||||
async def check_job_status(job_id: str):
 | 
			
		||||
    """Check the status of a previously submitted conversion job."""
 | 
			
		||||
    job = get_job(job_id)
 | 
			
		||||
    if not job:
 | 
			
		||||
        raise HTTPException(
 | 
			
		||||
            status_code=404,
 | 
			
		||||
            detail="Job not found"
 | 
			
		||||
        )
 | 
			
		||||
    
 | 
			
		||||
    # Clean sensitive or internal information
 | 
			
		||||
    response = {
 | 
			
		||||
        "job_id": job_id,
 | 
			
		||||
        "status": job["status"],
 | 
			
		||||
        "created_at": job["created_at"],
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    # Add error details if failed
 | 
			
		||||
    if job["status"] == "failed" and "error" in job:
 | 
			
		||||
        response["error"] = job["error"]
 | 
			
		||||
    
 | 
			
		||||
    # Add progress info if processing
 | 
			
		||||
    if job["status"] == "processing" and "progress" in job:
 | 
			
		||||
        response["progress"] = job["progress"]
 | 
			
		||||
    
 | 
			
		||||
    return response
 | 
			
		||||
 | 
			
		||||
@app.get("/tex2pdf/download/{job_id}", 
 | 
			
		||||
         dependencies=[Depends(verify_api_key)],
 | 
			
		||||
         summary="Download the generated PDF")
 | 
			
		||||
async def download_pdf(job_id: str):
 | 
			
		||||
    """Download the PDF generated by a completed conversion job."""
 | 
			
		||||
    job = get_job(job_id)
 | 
			
		||||
    if not job:
 | 
			
		||||
        raise HTTPException(
 | 
			
		||||
            status_code=404,
 | 
			
		||||
            detail="Job not found"
 | 
			
		||||
        )
 | 
			
		||||
    
 | 
			
		||||
    if job["status"] != "completed":
 | 
			
		||||
        raise HTTPException(
 | 
			
		||||
            status_code=400,
 | 
			
		||||
            detail=f"PDF not ready. Current status: {job['status']}"
 | 
			
		||||
        )
 | 
			
		||||
    
 | 
			
		||||
    try:
 | 
			
		||||
        # Option 1: Get PDF from memory and stream it
 | 
			
		||||
        # pdf_content = get_pdf(job_id)
 | 
			
		||||
        # if not pdf_content:
 | 
			
		||||
        #     raise HTTPException(
 | 
			
		||||
        #         status_code=404,
 | 
			
		||||
        #         detail="PDF file not found in storage"
 | 
			
		||||
        #     )
 | 
			
		||||
        # 
 | 
			
		||||
        # # Generate a filename based on the job ID
 | 
			
		||||
        # filename = f"document_{job_id[-6:]}.pdf"
 | 
			
		||||
        # 
 | 
			
		||||
        # return StreamingResponse(
 | 
			
		||||
        #     BytesIO(pdf_content), 
 | 
			
		||||
        #     media_type='application/pdf',
 | 
			
		||||
        #     headers={"Content-Disposition": f"attachment; filename={filename}"}
 | 
			
		||||
        # )
 | 
			
		||||
        
 | 
			
		||||
        # Option 2: Use FileResponse for more efficient file serving
 | 
			
		||||
        pdf_path = get_pdf_path(job_id)
 | 
			
		||||
        if not os.path.exists(pdf_path):
 | 
			
		||||
            raise HTTPException(
 | 
			
		||||
                status_code=404,
 | 
			
		||||
                detail="PDF file not found in storage"
 | 
			
		||||
            )
 | 
			
		||||
        
 | 
			
		||||
        filename = f"document_{job_id[-6:]}.pdf"
 | 
			
		||||
        
 | 
			
		||||
        return FileResponse(
 | 
			
		||||
            pdf_path,
 | 
			
		||||
            media_type='application/pdf',
 | 
			
		||||
            filename=filename
 | 
			
		||||
        )
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Error delivering PDF for job {job_id}: {str(e)}", exc_info=True)
 | 
			
		||||
        raise HTTPException(
 | 
			
		||||
            status_code=500,
 | 
			
		||||
            detail="Error retrieving PDF file"
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
@app.get("/health", summary="Health check endpoint")
 | 
			
		||||
async def health_check():
 | 
			
		||||
    """Simple health check endpoint to verify the API is running."""
 | 
			
		||||
    try:
 | 
			
		||||
        # Check database connection
 | 
			
		||||
        with sqlite3.connect(DB_PATH) as conn:
 | 
			
		||||
            cursor = conn.execute("SELECT 1")
 | 
			
		||||
            cursor.fetchone()
 | 
			
		||||
        db_status = "connected"
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        db_status = f"error: {str(e)}"
 | 
			
		||||
    
 | 
			
		||||
    return {
 | 
			
		||||
        "status": "healthy", 
 | 
			
		||||
        "version": VERSION,
 | 
			
		||||
        "database": db_status,
 | 
			
		||||
        "storage": os.path.exists(JOBS_DIR) and os.access(JOBS_DIR, os.W_OK)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@app.on_event("startup")
 | 
			
		||||
async def startup_event():
 | 
			
		||||
    logger.info("Service starting up")
 | 
			
		||||
    # Initialize the database
 | 
			
		||||
    init_db()
 | 
			
		||||
    # Start background cleanup task
 | 
			
		||||
    asyncio.create_task(cleanup_old_jobs())
 | 
			
		||||
 | 
			
		||||
@app.on_event("shutdown")
 | 
			
		||||
async def shutdown_event():
 | 
			
		||||
    """Clean up on shutdown"""
 | 
			
		||||
    logger.info("Service shutting down")
 | 
			
		||||
    executor.shutdown(wait=False)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										5
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,5 @@
 | 
			
		||||
fastapi==0.110.0
 | 
			
		||||
uvicorn==0.27.1
 | 
			
		||||
pydantic==2.5.3
 | 
			
		||||
python-multipart==0.0.6
 | 
			
		||||
aiofiles==23.2.1
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user