A very basic Docker setup for running a Jupyter Notebook and a Spark server with Spark UI, which will allow you to play around with new ideas and in general test PySpark locally without an expensive infrastructure.
version: '3'
services:
pyspark-jupyter:
image: jupyter/pyspark-notebook
ports:
- "8888:8888"
environment:
- PYSPARK_DRIVER_PYTHON=jupyter
- PYSPARK_DRIVER_PYTHON_OPTS=notebook
- JUPYTER_TOKEN=mysecrettoken # Change this to a secure token
volumes:
- ./notebooks:/home/jovyan/work
depends_on:
- spark-master
spark-master:
image: bitnami/spark:latest
ports:
- "4040:4040" # Spark UI