EXECUTABLE := go-whisper-api
GO ?= go
GOFILES := $(shell find . -name "*.go" -type f)
HAS_GO = $(shell hash $(GO) > /dev/null 2>&1 && echo "GO" || echo "NOGO" )

WHISPER_CPP    := $(abspath third_party/whisper.cpp)
WHISPER_BUILD  := $(WHISPER_CPP)/build
WHISPER_VENDOR := $(WHISPER_CPP)/bindings/go
WHISPER_LIBDIR := $(WHISPER_BUILD)/src:$(WHISPER_BUILD)/ggml/src

RUNTIME_LIB_DIR := $(abspath lib)
# $ORIGIN/lib — binary next to lib/ (e.g. ./go-whisper-api + ./lib/)
# $ORIGIN/../lib — binary in bin/ (e.g. bin/go-whisper-api + lib/)
RUNTIME_RPATH := -Wl,-rpath,$$ORIGIN/lib:$$ORIGIN/../lib

ifneq ($(shell uname), Darwin)
	EXTLDFLAGS = -extldflags "$(RUNTIME_RPATH)"
else
	EXTLDFLAGS =
endif

ifeq ($(HAS_GO), GO)
	GOPATH ?= $(shell $(GO) env GOPATH)
	export PATH := $(GOPATH)/bin:$(PATH)

	CGO_EXTRA_CFLAGS := -DSQLITE_MAX_VARIABLE_NUMBER=32766
	CGO_CFLAGS ?= $(shell $(GO) env CGO_CFLAGS) $(CGO_EXTRA_CFLAGS)
endif

ifeq ($(OS), Windows_NT)
	GOFLAGS := -v -buildmode=exe
	EXECUTABLE ?= $(EXECUTABLE).exe
else ifeq ($(OS), Windows)
	GOFLAGS := -v -buildmode=exe
	EXECUTABLE ?= $(EXECUTABLE).exe
else
	GOFLAGS := -v
	EXECUTABLE ?= $(EXECUTABLE)
endif

ifneq ($(DRONE_TAG),)
	VERSION ?= $(DRONE_TAG)
else
	VERSION ?= $(shell git describe --tags --always || git rev-parse --short HEAD)
endif

TAGS ?=
UNAME_M := $(shell uname -m)
ifeq ($(UNAME_M),x86_64)
SHERPA_LIBARCH := x86_64-unknown-linux-gnu
endif
ifeq ($(UNAME_M),aarch64)
SHERPA_LIBARCH := aarch64-unknown-linux-gnu
endif
SHERPA_LINUX_VER := $(shell awk '/sherpa-onnx-go-linux/ {print $$2; exit}' go.mod)
SHERPA_LIBDIR := $(GOPATH)/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-linux@$(SHERPA_LINUX_VER)/lib/$(SHERPA_LIBARCH)
ifneq ($(shell uname), Darwin)
EXTLDFLAGS_SHERPA = -extldflags "$(RUNTIME_RPATH)"
EXTLDFLAGS_XLM = -extldflags "$(RUNTIME_RPATH)"
else
EXTLDFLAGS_SHERPA =
EXTLDFLAGS_XLM =
endif
GOLDFLAGS ?= -X 'main.Version=$(VERSION)'

INCLUDE_PATH := $(WHISPER_CPP)/include:$(WHISPER_CPP)/ggml/include:$(WHISPER_VENDOR):$(INCLUDE_PATH)
LIBRARY_PATH := $(WHISPER_LIBDIR):$(LIBRARY_PATH)
export LD_LIBRARY_PATH := $(WHISPER_LIBDIR):$(LD_LIBRARY_PATH)

ifdef WHISPER_CUBLAS
	CGO_CFLAGS      += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
	CGO_CXXFLAGS    += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
	EXTLDFLAGS      = -extldflags "-lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib"

build: $(EXECUTABLE)

$(EXECUTABLE): $(GOFILES)
	CGO_CXXFLAGS=${CGO_CXXFLAGS} CGO_CFLAGS=${CGO_CFLAGS} C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GO) build -v -tags '$(TAGS)' -ldflags '$(EXTLDFLAGS)-s -w $(GOLDFLAGS)' -o bin/$@
endif

MODEL_URL ?= https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin
MODEL_PATH ?= models/ggml-tiny.en.bin
VAD_MODEL ?= silero-v6.2.0
VAD_MODEL_PATH ?= models/ggml-silero-v6.2.0.bin

all: build

PUNCT_MODEL_URL ?= https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12-int8.tar.bz2
PUNCT_MODEL_DIR ?= models/punctuation/ct-transformer-zh-en-int8

XLM_PUNCT_DIR ?= models/punctuation/xlm-roberta
XLM_HF_REPO ?= Salama1429/xlm-roberta_punctuation_fullstop_truecase

XLM_MODEL_CONFIG_SRC ?= config/xlm-roberta-model.yaml
ORT_LIB_SRC ?= $(shell $(GO) env GOMODCACHE 2>/dev/null)/github.com/k2-fsa/sherpa-onnx-go-linux@$(SHERPA_LINUX_VER)/lib/$(SHERPA_LIBARCH)/libonnxruntime.so

# Copy runtime .so into ./lib/. Binary rpath: $ORIGIN/lib or $ORIGIN/../lib (see RUNTIME_RPATH).
# Use cp -n where possible: existing root-owned libs in lib/ must not break the build.
install-runtime-libs: dependency
	@mkdir -p "$(RUNTIME_LIB_DIR)"
	@cp -an "$(WHISPER_BUILD)/src"/libwhisper.so* "$(RUNTIME_LIB_DIR)/" 2>/dev/null || true
	@cp -an "$(WHISPER_BUILD)/ggml/src"/libggml*.so* "$(RUNTIME_LIB_DIR)/" 2>/dev/null || true
	@echo "Whisper/ggml libs ready in $(RUNTIME_LIB_DIR)/"

install-ort-lib:
	@mkdir -p "$(RUNTIME_LIB_DIR)"
	@if [ ! -f "$(ORT_LIB_SRC)" ]; then echo "missing $(ORT_LIB_SRC); run: go mod download"; exit 1; fi
	@dest="$(RUNTIME_LIB_DIR)/libonnxruntime.so"; \
	if [ -f "$$dest" ] && cmp -s "$(ORT_LIB_SRC)" "$$dest"; then \
		echo "libonnxruntime.so already up to date in $(RUNTIME_LIB_DIR)/"; \
	elif cp -f "$(ORT_LIB_SRC)" "$$dest" 2>/dev/null; then \
		echo "Installed $$dest"; \
	elif [ -f "$$dest" ] && cmp -s "$(ORT_LIB_SRC)" "$$dest"; then \
		echo "libonnxruntime.so present in $(RUNTIME_LIB_DIR)/ (unchanged, not writable)"; \
	else \
		echo "cannot install libonnxruntime.so to $$dest"; \
		echo "fix: sudo chown -R $$USER:$$(id -gn) $(RUNTIME_LIB_DIR)"; \
		exit 1; \
	fi

# XLM punctuation links -lsentencepiece; bundle .so for hosts without libsentencepiece0 package.
SP_LIB_DIRS := /usr/lib/x86_64-linux-gnu /usr/lib/aarch64-linux-gnu /usr/lib64 /usr/lib
install-sp-lib:
	@mkdir -p "$(RUNTIME_LIB_DIR)"
	@found=0; \
	for d in $(SP_LIB_DIRS); do \
		if [ -e "$$d/libsentencepiece.so.0" ] || [ -L "$$d/libsentencepiece.so.0" ]; then \
			cp -an "$$d"/libsentencepiece.so* "$(RUNTIME_LIB_DIR)/" 2>/dev/null || true; \
			found=1; \
			break; \
		fi; \
	done; \
	if [ "$$found" = "0" ]; then \
		echo "libsentencepiece.so.0 not found; install: sudo apt-get install libsentencepiece0"; \
		exit 1; \
	fi
	@test -e "$(RUNTIME_LIB_DIR)/libsentencepiece.so.0" || (echo "missing $(RUNTIME_LIB_DIR)/libsentencepiece.so.0 after install-sp-lib"; exit 1)
	@echo "Sentencepiece libs ready in $(RUNTIME_LIB_DIR)/"

# If lib/*.so were created as root (e.g. manual cp with sudo), reclaim ownership for builds.
fix-lib-perms:
	@if [ -d "$(RUNTIME_LIB_DIR)" ]; then \
		chown -R "$$USER:$$(id -gn)" "$(RUNTIME_LIB_DIR)" 2>/dev/null || \
		sudo chown -R "$$USER:$$(id -gn)" "$(RUNTIME_LIB_DIR)"; \
		echo "Ownership of $(RUNTIME_LIB_DIR)/ updated"; \
	fi

install-runtime-libs-xlm: install-runtime-libs install-ort-lib install-sp-lib

# Fail fast before deploy if ./lib is incomplete (lib/ is not in git: *.so is gitignored).
verify-runtime-libs-xlm:
	@test -f bin/$(EXECUTABLE) || (echo "missing bin/$(EXECUTABLE); run: make build-xlm"; exit 1)
	@test -f "$(RUNTIME_LIB_DIR)/libonnxruntime.so" || (echo "missing $(RUNTIME_LIB_DIR)/libonnxruntime.so; run: make install-runtime-libs-xlm"; exit 1)
	@test -e "$(RUNTIME_LIB_DIR)/libsentencepiece.so.0" || (echo "missing $(RUNTIME_LIB_DIR)/libsentencepiece.so.0; run: make install-sp-lib"; exit 1)
	@test -e "$(RUNTIME_LIB_DIR)/libwhisper.so.1" || (echo "missing $(RUNTIME_LIB_DIR)/libwhisper.so.1; run: make install-runtime-libs"; exit 1)
	@echo "Runtime libs OK in $(RUNTIME_LIB_DIR)/"

RUNTIME_TARBALL := dist/go-whisper-api-runtime-$(shell uname -m).tar.gz
package-runtime-xlm: verify-runtime-libs-xlm
	@mkdir -p dist
	tar -czf "$(RUNTIME_TARBALL)" bin/$(EXECUTABLE) lib
	@echo "Created $(RUNTIME_TARBALL) — on prod: tar -xzf ... -C /opt/go-whisper-api (keeps bin/ and lib/)"

# Copy bundled label config (needs write access to $(XLM_PUNCT_DIR); fix with: sudo chown -R $$USER models/punctuation)
install-xlm-punctuation-config:
	@mkdir -p $(XLM_PUNCT_DIR)
	@cp "$(XLM_MODEL_CONFIG_SRC)" "$(XLM_PUNCT_DIR)/config.yaml"
	@echo "Installed $(XLM_PUNCT_DIR)/config.yaml"

download-xlm-punctuation-model: install-xlm-punctuation-config
	@mkdir -p $(XLM_PUNCT_DIR)
	@for f in model.onnx sp.model; do \
		if [ ! -f "$(XLM_PUNCT_DIR)/$$f" ]; then \
			echo "Downloading $$f from $(XLM_HF_REPO)..."; \
			curl -fL "https://huggingface.co/$(XLM_HF_REPO)/resolve/main/$$f" -o "$(XLM_PUNCT_DIR)/$$f"; \
		else \
			echo "Already have $(XLM_PUNCT_DIR)/$$f"; \
		fi; \
	done

DIAR_SEG_URL ?= https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
DIAR_EMB_URL ?= https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
DIAR_DIR ?= models/diarization

download-diarization-models:
	@mkdir -p $(DIAR_DIR)
	@if [ ! -f "$(DIAR_DIR)/pyannote-segmentation-3-0/model.onnx" ]; then \
		echo "Downloading speaker segmentation model..."; \
		curl -fL "$(DIAR_SEG_URL)" -o /tmp/diar-seg.tar.bz2; \
		tar -xjf /tmp/diar-seg.tar.bz2 -C $(DIAR_DIR); \
		rm -f /tmp/diar-seg.tar.bz2; \
	else \
		echo "Segmentation model present"; \
	fi
	@if [ ! -f "$(DIAR_DIR)/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx" ]; then \
		echo "Downloading speaker embedding model..."; \
		curl -fL "$(DIAR_EMB_URL)" -o "$(DIAR_DIR)/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"; \
	else \
		echo "Embedding model present"; \
	fi

download-punctuation-model:
	@mkdir -p models/punctuation
	@if [ ! -f "$(PUNCT_MODEL_DIR)/model.int8.onnx" ]; then \
		echo "Downloading punctuation model..."; \
		curl -fL "$(PUNCT_MODEL_URL)" -o /tmp/punct-model.tar.bz2; \
		tar -xjf /tmp/punct-model.tar.bz2 -C models/punctuation; \
		rm -f /tmp/punct-model.tar.bz2; \
		if [ -d models/punctuation/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12-int8 ]; then \
			mv models/punctuation/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12-int8 "$(PUNCT_MODEL_DIR)"; \
		fi; \
	else \
		echo "Punctuation model already exists: $(PUNCT_MODEL_DIR)/model.int8.onnx"; \
	fi

download-model:
	@mkdir -p models
	@if [ ! -f "$(MODEL_PATH)" ]; then \
		echo "Downloading $(MODEL_PATH)..."; \
		curl -fL "$(MODEL_URL)" -o "$(MODEL_PATH)"; \
	else \
		echo "Model already exists: $(MODEL_PATH)"; \
	fi

download-vad-model:
	@mkdir -p models
	@if [ ! -f "$(VAD_MODEL_PATH)" ]; then \
		echo "Downloading VAD model $(VAD_MODEL) to models/..."; \
		./third_party/whisper.cpp/models/download-vad-model.sh $(VAD_MODEL) models; \
	else \
		echo "VAD model already exists: $(VAD_MODEL_PATH)"; \
	fi

clone:
	@[ -d third_party/whisper.cpp ] || git clone https://github.com/appleboy/whisper.cpp.git third_party/whisper.cpp

dependency: clone
	@echo Build whisper
	@if [ ! -f "$(WHISPER_BUILD)/src/libwhisper.so" ] && [ ! -f "$(WHISPER_BUILD)/src/libwhisper.a" ]; then \
		cmake -S "$(WHISPER_CPP)" -B "$(WHISPER_BUILD)" -DCMAKE_BUILD_TYPE=Release && \
		cmake --build "$(WHISPER_BUILD)" --config Release -j$$(nproc 2>/dev/null || echo 4); \
	else \
		echo "whisper library already built in $(WHISPER_BUILD)"; \
	fi

test:
	@C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GO) test -v -cover -coverprofile coverage.txt ./... && echo "\n==>\033[32m Ok\033[m\n" || exit 1

install: $(GOFILES)
	C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GO) install -v -tags '$(TAGS)' -ldflags '$(EXTLDFLAGS)-s -w $(GOLDFLAGS)'

build: install-runtime-libs $(EXECUTABLE)

# Build with sherpa-onnx (punctuation + speaker diarization)
build-sherpa:
	@$(MAKE) build TAGS=sherpa

# XLM-RoBERTa punctuation (47 languages); requires libsentencepiece-dev
build-xlm:
	@$(MAKE) install-runtime-libs-xlm
	@$(MAKE) build TAGS=xlm

$(EXECUTABLE): $(GOFILES)
ifneq (,$(findstring xlm,$(TAGS)))
	C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=$(SHERPA_LIBDIR):${LIBRARY_PATH} $(GO) build -v -tags '$(TAGS)' -ldflags '$(EXTLDFLAGS_XLM) -s -w $(GOLDFLAGS)' -o bin/$@
else ifneq (,$(findstring sherpa,$(TAGS)))
	C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=$(SHERPA_LIBDIR):${LIBRARY_PATH} $(GO) build -v -tags '$(TAGS)' -ldflags '$(EXTLDFLAGS_SHERPA) -s -w $(GOLDFLAGS)' -o bin/$@
else
	C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} $(GO) build -v -tags '$(TAGS)' -ldflags '$(EXTLDFLAGS)-s -w $(GOLDFLAGS)' -o bin/$@
endif

clean:
	$(GO) clean -x -i ./...
	rm -rf coverage.txt $(EXECUTABLE) $(DIST) bin/$(EXECUTABLE)

clean-whisper:
	rm -rf "$(WHISPER_BUILD)"

version:
	@echo $(VERSION)
