Deploy Real-Time Endpoint
Create the inference code
from fastapi import FastAPI, File, UploadFile
import tensorflow as tf
import numpy
from PIL import Image
from io import BytesIO
app = FastAPI()
model_path = "model.h5"
loaded_model = None
@app.post("{full_path:path}")
async def predict(image: UploadFile = File(...)):
img = Image.open(BytesIO(await image.read()))
# Preprocess for MNIST
img = img.resize((28, 28)).convert("L")
img_array = numpy.array(img)
image_data = numpy.reshape(img_array, (1, 28, 28))
# Load model once, reuse for subsequent requests
global loaded_model
if not loaded_model:
loaded_model = tf.keras.models.load_model(model_path)
# Run prediction
prediction = loaded_model.predict_classes(image_data)
return f"Predicted_Digit: {prediction[0]}"Test locally
Define the endpoint
Installing additional packages
Push to Git
Create the deployment
Test your endpoint
Last updated
Was this helpful?
