TinyML on ESP32: Building an Anomaly Detection Model from Scratch
Running a trained anomaly detection model on an ESP32 with no cloud. Here's the full pipeline — data collection, training, quantization, and inference.
The pitch for Edge AI is simple: instead of sending sensor data to the cloud for analysis, run the model on the device. No latency. No bandwidth cost. Works offline. But most Edge AI tutorials stop at "flash the model" without showing the full pipeline. This is the full pipeline.
What We're Building
A vibration anomaly detector for rotating machinery. The ESP32 reads an MPU6050 accelerometer at 200Hz, runs a trained autoencoder model, and flags anomalies locally — no WiFi required for inference.
Hardware needed:
- ESP32-S3 DevKit
- MPU6050 accelerometer/gyroscope
- Any industrial fan or motor for realistic data collection
Step 1: Data Collection Firmware
Before training, you need labelled data. Collect two classes: normal and anomaly (bearing failure, imbalance, loose mount).
1#include "driver/i2c.h"
2#include "mpu6050.h"
3#include "esp_log.h"
4
5#define SAMPLE_RATE_HZ 200
6#define WINDOW_SIZE 128 // 128 samples = 640ms at 200Hz
7#define FEATURE_COUNT 6 // ax, ay, az, gx, gy, gz
8
9static float window[WINDOW_SIZE][FEATURE_COUNT];
10static int window_index = 0;
11
12void collect_sample(void) {
13 mpu6050_data_t data;
14 mpu6050_read(&data);
15
16 window[window_index][0] = data.accel_x;
17 window[window_index][1] = data.accel_y;
18 window[window_index][2] = data.accel_z;
19 window[window_index][3] = data.gyro_x;
20 window[window_index][4] = data.gyro_y;
21 window[window_index][5] = data.gyro_z;
22
23 window_index = (window_index + 1) % WINDOW_SIZE;
24
25 if (window_index == 0) {
26 // Window full — log to SD card or UART for collection
27 log_window_to_uart(window);
28 }
29}Collect ~10 minutes of normal operation and ~5 minutes of each anomaly class. Label the CSV output before training.
Step 2: Feature Extraction and Model Training
Raw time-series is too large for ESP32. Extract statistical features from each window:
1import numpy as np
2import pandas as pd
3
4def extract_features(window):
5 """Extract 18 statistical features from a 128x6 window."""
6 features = []
7 for axis in range(6):
8 col = window[:, axis]
9 features.extend([
10 np.mean(col),
11 np.std(col),
12 np.max(col) - np.min(col), # peak-to-peak
13 ])
14 return np.array(features) # shape: (18,)
15
16# Load collected CSV
17df = pd.read_csv("vibration_data.csv")
18windows = df.values.reshape(-1, 128, 6)
19X = np.array([extract_features(w) for w in windows])Train a simple autoencoder — it learns to reconstruct normal patterns and fails on anomalies:
1import tensorflow as tf
2
3def build_autoencoder(input_dim=18, latent_dim=8):
4 encoder_input = tf.keras.Input(shape=(input_dim,))
5 encoded = tf.keras.layers.Dense(12, activation='relu')(encoder_input)
6 latent = tf.keras.layers.Dense(latent_dim, activation='relu')(encoded)
7 decoded = tf.keras.layers.Dense(12, activation='relu')(latent)
8 output = tf.keras.layers.Dense(input_dim)(decoded)
9 return tf.keras.Model(encoder_input, output)
10
11model = build_autoencoder()
12model.compile(optimizer='adam', loss='mse')
13
14# Train ONLY on normal data — anomaly = high reconstruction error
15X_normal = X[labels == 0]
16model.fit(X_normal, X_normal, epochs=50, batch_size=32, validation_split=0.1)Step 3: Quantize to int8 TFLite
1# Convert to TFLite with full int8 quantization
2def representative_dataset():
3 for sample in X_normal[:200]:
4 yield [sample.astype(np.float32).reshape(1, -1)]
5
6converter = tf.lite.TFLiteConverter.from_keras_model(model)
7converter.optimizations = [tf.lite.Optimize.DEFAULT]
8converter.representative_dataset = representative_dataset
9converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
10converter.inference_input_type = tf.int8
11converter.inference_output_type = tf.int8
12
13tflite_model = converter.convert()
14
15with open("anomaly_detector_int8.tflite", "wb") as f:
16 f.write(tflite_model)
17
18print(f"Model size: {len(tflite_model) / 1024:.1f} KB")
19# Expect: ~4-8 KB for this architectureConvert to a C byte array for embedding in firmware:
1xxd -i anomaly_detector_int8.tflite > model_data.hStep 4: On-Device Inference with TFLite Micro
1#include "tensorflow/lite/micro/micro_interpreter.h"
2#include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
3#include "model_data.h"
4
5// Tensor arena — tune this to your model size
6constexpr int kTensorArenaSize = 32 * 1024;
7static uint8_t tensor_arena[kTensorArenaSize];
8
9static tflite::MicroInterpreter* interpreter = nullptr;
10
11void inference_init(void) {
12 static tflite::MicroMutableOpResolver<4> resolver;
13 resolver.AddFullyConnected();
14 resolver.AddRelu();
15 resolver.AddQuantize();
16 resolver.AddDequantize();
17
18 static const tflite::Model* model =
19 tflite::GetModel(anomaly_detector_int8_tflite);
20
21 static tflite::MicroInterpreter static_interpreter(
22 model, resolver, tensor_arena, kTensorArenaSize);
23
24 interpreter = &static_interpreter;
25 interpreter->AllocateTensors();
26}
27
28float run_inference(float* features, int feature_count) {
29 TfLiteTensor* input = interpreter->input(0);
30
31 // Quantize float features to int8
32 float scale = input->params.scale;
33 int32_t zero_pt = input->params.zero_point;
34 for (int i = 0; i < feature_count; i++) {
35 input->data.int8[i] = (int8_t)(features[i] / scale + zero_pt);
36 }
37
38 interpreter->Invoke();
39
40 // Dequantize output
41 TfLiteTensor* output = interpreter->output(0);
42 float scale_out = output->params.scale;
43 int32_t zero_pt_out = output->params.zero_point;
44
45 // Compute reconstruction MSE
46 float mse = 0.0f;
47 for (int i = 0; i < feature_count; i++) {
48 float reconstructed = (output->data.int8[i] - zero_pt_out) * scale_out;
49 float diff = features[i] - reconstructed;
50 mse += diff * diff;
51 }
52 return mse / feature_count;
53}Step 5: Anomaly Threshold and Alerting
1#define ANOMALY_THRESHOLD 0.025f // tuned on validation set
2#define ALERT_GPIO GPIO_NUM_2
3
4void anomaly_detection_task(void *pvParameters) {
5 inference_init();
6
7 while (1) {
8 // Wait for full window of sensor data
9 float features[18];
10 collect_and_extract_features(features);
11
12 float reconstruction_error = run_inference(features, 18);
13
14 if (reconstruction_error > ANOMALY_THRESHOLD) {
15 ESP_LOGW("ANOMALY", "Detected! Error=%.4f (threshold=%.4f)",
16 reconstruction_error, ANOMALY_THRESHOLD);
17 gpio_set_level(ALERT_GPIO, 1); // trigger LED / relay
18 } else {
19 gpio_set_level(ALERT_GPIO, 0);
20 }
21
22 vTaskDelay(pdMS_TO_TICKS(640)); // 128 samples @ 200Hz = 640ms window
23 }
24}Key Takeaways
- ESP32-S3 is the right target — vector extensions make neural network ops 2–4x faster than base ESP32
- Feature extraction beats raw time-series — 18 statistical features are more informative and 42x smaller than the raw 128×6 window
- Autoencoders excel at anomaly detection — train only on normal data, anomalies have high reconstruction error
- int8 quantization is a free lunch — 4x smaller, 2–3x faster, under 2% accuracy loss for simple architectures
- Set your threshold on real data — use the 95th or 99th percentile of normal reconstruction errors from a held-out validation set
Go from Arduino to Production Firmware
The ESP32-IDF Workshop covers ESP-IDF from scratch — tasks, queues, OTA, Wifi management, and deploying firmware that doesn't break at 3am.
Frequently Asked Questions
Quick answers to common questions

I build things that run on chips and the software that talks to them. ESP32, STM32, FreeRTOS, FastAPI, TinyML — from bare-metal firmware to cloud backends to on-device inference. Based in Bengaluru. Founder of Analog Data.