refactor(server): split gtfs into its own module

This commit is contained in:
Cilly Leang 2026-03-31 23:12:54 +11:00
parent aad5ae4024
commit 0181497420
Signed by: cilly
GPG key ID: 6500251E087653C9
18 changed files with 241 additions and 132 deletions

View file

@ -12,8 +12,17 @@ application {
applicationDefaultJvmArgs = listOf("-Dio.ktor.development=${extra["io.ktor.development"] ?: "false"}")
}
kotlin {
compilerOptions {
freeCompilerArgs.add("-Xexplicit-backing-fields")
}
}
dependencies {
implementation(projects.shared)
implementation(projects.server.gtfs)
implementation(projects.server.gtfsRt)
implementation(libs.logback)
implementation(libs.koin.core)
implementation(libs.koin.ktor)

View file

@ -0,0 +1,20 @@
plugins {
alias(libs.plugins.kotlinJvm)
alias(libs.plugins.kotlinSerialization)
}
kotlin {
compilerOptions {
freeCompilerArgs.add("-opt-in=kotlin.time.ExperimentalTime")
freeCompilerArgs.add("-Xexplicit-backing-fields")
}
}
dependencies {
implementation(projects.shared)
implementation(libs.kotlinx.serialization.csv)
implementation(libs.kotlinx.datetime)
implementation(libs.ktor.client.contentnegotiation)
implementation(libs.ktor.client.core)
implementation(libs.ktor.client.okhttp)
}

View file

@ -9,6 +9,9 @@ import io.ktor.client.statement.bodyAsChannel
import io.ktor.util.cio.writeChannel
import io.ktor.util.logging.Logger
import io.ktor.utils.io.copyAndClose
import kotlinx.coroutines.flow.Flow
import kotlinx.coroutines.flow.flow
import kotlinx.coroutines.flow.onCompletion
import kotlinx.datetime.DayOfWeek
import kotlinx.datetime.LocalDate
import kotlinx.serialization.decodeFromString
@ -16,14 +19,12 @@ import kotlinx.serialization.modules.EmptySerializersModule
import kotlinx.serialization.serializer
import moe.lava.banksia.Constants
import moe.lava.banksia.model.Route
import moe.lava.banksia.model.RouteType
import moe.lava.banksia.model.Service
import moe.lava.banksia.model.Shape
import moe.lava.banksia.model.Stop
import moe.lava.banksia.model.StopTime
import moe.lava.banksia.model.Trip
import moe.lava.banksia.room.Database
import moe.lava.banksia.room.converter.RouteTypeConverter
import moe.lava.banksia.room.entity.asEntity
import moe.lava.banksia.server.gtfs.structures.GtfsRoute
import moe.lava.banksia.server.gtfs.structures.GtfsService
import moe.lava.banksia.server.gtfs.structures.GtfsShape
@ -33,19 +34,26 @@ import moe.lava.banksia.server.gtfs.structures.GtfsTrip
import moe.lava.banksia.util.Point
import java.io.File
import java.util.zip.ZipFile
import kotlin.time.Clock
import kotlin.time.ExperimentalTime
class GtfsHandler(
sealed class GtfsData {
data class RouteChunk(val routes: List<Route>) : GtfsData()
data class ServiceChunk(val services: List<Service>) : GtfsData()
data class ShapeChunk(val shapes: List<Shape>) : GtfsData()
data class StopChunk(val stops: List<Stop>) : GtfsData()
data class StopTimeChunk(val stopTimes: List<StopTime>) : GtfsData()
data class TripChunk(val trips: List<Trip>) : GtfsData()
}
class GtfsParser(
private val log: Logger,
private val client: HttpClient,
private val db: Database,
) {
private val csv = CsvFormat(StringDeferringConfig(EmptySerializersModule()))
private val datasetPath = File("/tmp/banksia", "dataset.zip")
@OptIn(ExperimentalTime::class)
suspend fun update(datasetUrl: String, date: Long? = null) {
suspend fun update(datasetUrl: String): Flow<GtfsData> {
val parentDir = datasetPath.parentFile
@Suppress("SimplifyBooleanWithConstants", "KotlinConstantConditions")
if (parentDir.exists() && !Constants.devMode)
@ -74,39 +82,56 @@ class GtfsHandler(
extractAll(datasetPath)
}
addRoutes(files)
addStops(files)
addShapes(files)
val services = addServices(files)
val trips = addTrips(files, services.associateBy { it.id })
addStopTimes(files, trips.associateBy { it.id })
log.info("parsing...")
return parse(files)
.onCompletion {
@Suppress("KotlinConstantConditions")
if (!Constants.devMode) {
parentDir.deleteRecursively()
}
updateMetadata(date ?: Clock.System.now().epochSeconds)
@Suppress("KotlinConstantConditions")
if (!Constants.devMode) {
parentDir.deleteRecursively()
}
log.info("done!")
log.info("done!")
}
}
private suspend fun updateMetadata(date: Long) {
val dao = db.versionMetadataDao
log.info("updating metadata...")
dao.update(date, listOf("routes", "stops", "shapes", "trips", "stop_times"))
}
private suspend fun addRoutes(files: List<File>) {
val dao = db.routeDao
log.info("parsing routes...")
val routes = files
private fun parse(files: List<File>) = flow {
files
.filter { it.name == "routes.txt" }
.flatMap { fd -> parseRoutes(fd) }
.forEach { emit(GtfsData.RouteChunk(parseRoutes(it))) }
log.info("inserting routes...")
dao.deleteAll()
dao.insertAll(*routes.map { it.asEntity() }.toTypedArray())
files
.filter { it.name == "stops.txt" }
.forEach { emit(GtfsData.StopChunk(parseStops(it))) }
files
.filter { it.name == "shapes.txt" }
.forEach { emit(GtfsData.ShapeChunk(parseShapes(it))) }
val services = files
.filter { it.name == "calendar.txt" }
.flatMap { fd ->
parseServices(fd)
.also { emit(GtfsData.ServiceChunk(it)) }
}
.associateBy { it.id }
val trips = files
.filter { it.name == "trips.txt" }
.flatMap { fd ->
parseTrips(fd, services)
.also { emit(GtfsData.TripChunk(it)) }
}
.associateBy { it.id }
files
.filter { it.name == "stop_times.txt" }
.forEach { fd ->
log.info("parsing stop times for ${fd.parent}...")
parseStopTimes(fd, trips) { seq ->
seq.chunked(1000000)
.forEach { emit(GtfsData.StopTimeChunk(it)) }
}
}
}
private fun parseRoutes(fd: File) =
@ -114,24 +139,12 @@ class GtfsHandler(
.map { with(it) {
Route(
id = route_id,
type = RouteTypeConverter.from(fd.parentFile.name.toInt()),
type = RouteType.from(fd.parentFile.name.toInt()),
number = route_short_name,
name = route_long_name,
)
} }
private suspend fun addShapes(files: List<File>) {
val dao = db.shapeDao
log.info("parsing shapes...")
val shapes = files
.filter { it.name == "shapes.txt" }
.flatMap { fd -> parseShapes(fd) }
log.info("inserting shapes...")
dao.deleteAll()
dao.insertAll(*shapes.map { it.asEntity() }.toTypedArray())
}
private fun parseShapes(fd: File) =
fd.parseCsv<GtfsShape>()
.groupBy { it.shape_id }
@ -143,29 +156,6 @@ class GtfsHandler(
Shape(id, points)
}
private suspend fun addStops(files: List<File>) {
val dao = db.stopDao
log.info("parsing stops...")
val stops = files
.filter { it.name == "stops.txt" }
.flatMap { fd -> parseStops(fd) }
log.info("inserting stops...")
dao.deleteAll()
stops
.groupBy { it.id }
.forEach { (id, gstops) ->
if (gstops.size > 1) {
if (gstops.withIndex().any { (i, stop) -> i != 0 && stop != gstops[i - 1] }) {
gstops.forEach {
log.info("duplicate $id: $it")
}
}
}
}
dao.insertOrReplaceAll(*stops.map { it.asEntity() }.toTypedArray())
}
private fun parseStops(fd: File) =
fd.parseCsv<GtfsStop>()
.map { with(it) {
@ -180,26 +170,6 @@ class GtfsHandler(
)
} }
private suspend fun addStopTimes(files: List<File>, trips: Map<String, Trip>) {
val dao = db.stopTimeDao
dao.deleteAll()
log.info("parsing stop times...")
files
.filter { it.name == "stop_times.txt" }
.forEach { fd ->
log.info("parsing stop times for ${fd.parent}...")
parseStopTimes(fd, trips) { seq ->
seq.chunked(1000000)
.forEach { queue ->
log.info("converting stop times (${queue.size}) for ${fd.parent}...")
val conv = queue.map { it.asEntity() }.toTypedArray()
log.info("inserting stop times (${conv.size}) for ${fd.parent}...")
dao.insertOrReplaceAll(*conv)
}
}
}
}
private inline fun parseStopTimes(fd: File, trips: Map<String, Trip>, block: (Sequence<StopTime>) -> Unit) =
fd.parseCsvSequence<GtfsStopTime> { seq ->
seq
@ -217,20 +187,6 @@ class GtfsHandler(
.let { block(it) }
}
private suspend fun addServices(files: List<File>): List<Service> {
val dao = db.serviceDao
log.info("parsing services...")
val services = files
.filter { it.name == "calendar.txt" }
.flatMap { fd -> parseServices(fd) }
log.info("inserting services...")
dao.deleteAll()
dao.insertOrReplaceAll(*services.map { it.asEntity() }.toTypedArray())
return services
}
private fun parseServices(fd: File) =
fd.parseCsv<GtfsService>()
.map { with(it) {
@ -251,20 +207,6 @@ class GtfsHandler(
)
} }
private suspend fun addTrips(files: List<File>, services: Map<String, Service>): List<Trip> {
val dao = db.tripDao
log.info("parsing trips...")
val trips = files
.filter { it.name == "trips.txt" }
.flatMap { fd -> parseTrips(fd, services) }
log.info("inserting trips...")
dao.deleteAll()
dao.insertOrReplaceAll(*trips.map { it.asEntity() }.toTypedArray())
return trips
}
private fun parseTrips(fd: File, services: Map<String, Service>) =
fd.parseCsv<GtfsTrip>()
.map { with(it) {

View file

@ -4,7 +4,7 @@ import kotlinx.serialization.Serializable
@Suppress("PropertyName")
@Serializable
data class GtfsRoute(
internal data class GtfsRoute(
val route_id: String,
val agency_id: String,
val route_short_name: String,

View file

@ -4,7 +4,7 @@ import kotlinx.serialization.Serializable
@Suppress("PropertyName")
@Serializable
data class GtfsService(
internal data class GtfsService(
val service_id: String,
val monday: Int,
val tuesday: Int,

View file

@ -4,7 +4,7 @@ import kotlinx.serialization.Serializable
@Suppress("PropertyName")
@Serializable
data class GtfsShape(
internal data class GtfsShape(
val shape_id: String,
val shape_pt_lat: Double,
val shape_pt_lon: Double,

View file

@ -4,7 +4,7 @@ import kotlinx.serialization.Serializable
@Suppress("PropertyName")
@Serializable
data class GtfsStop(
internal data class GtfsStop(
val stop_id: String,
val stop_name: String,
val stop_lat: Double,

View file

@ -5,7 +5,7 @@ import moe.lava.banksia.model.FutureTime
@Suppress("PropertyName")
@Serializable
data class GtfsStopTime(
internal data class GtfsStopTime(
val trip_id: String,
val arrival_time: String,
val departure_time: String,

View file

@ -4,7 +4,7 @@ import kotlinx.serialization.Serializable
@Suppress("PropertyName")
@Serializable
data class GtfsTrip(
internal data class GtfsTrip(
val route_id: String,
val service_id: String,
val trip_id: String,

View file

@ -0,0 +1,31 @@
plugins {
alias(libs.plugins.kotlinJvm)
alias(libs.plugins.kotlinSerialization)
alias(libs.plugins.wire)
}
kotlin {
compilerOptions {
freeCompilerArgs.add("-opt-in=kotlin.time.ExperimentalTime")
}
}
dependencies {
implementation(projects.shared)
implementation(libs.okio)
implementation(libs.koin.core)
implementation(libs.ktor.client.core)
implementation(libs.ktor.client.contentnegotiation)
implementation(libs.ktor.serialization.kotlinx.json)
implementation(libs.kotlinx.coroutines.core)
implementation(libs.kotlinx.datetime)
implementation(libs.kotlinx.serialization.json)
implementation(libs.kotlinx.serialization.protobuf)
}
wire {
sourcePath {
srcDir("src/commonMain/proto")
}
kotlin {}
}

View file

@ -26,7 +26,6 @@ import moe.lava.banksia.room.dao.StopDao
import moe.lava.banksia.room.dao.StopTimeDao
import moe.lava.banksia.room.dao.VersionMetadataDao
import moe.lava.banksia.server.di.ServerModules
import moe.lava.banksia.server.gtfs.GtfsHandler
import moe.lava.banksia.server.gtfsr.GtfsrService
import moe.lava.banksia.util.serialise
import org.koin.dsl.module
@ -67,8 +66,8 @@ fun Application.module() {
?: "https://opendata.transport.vic.gov.au/dataset/3f4e292e-7f8a-4ffe-831f-1953be0fe448/resource/${datasetUuid}/download/gtfs.zip"
call.respondText("received")
launch(context = Dispatchers.IO) {
val handler by inject<GtfsHandler>()
handler.update(datasetUrl)
val importer by inject<GtfsImporter>()
importer.import(datasetUrl)
}
}

View file

@ -0,0 +1,95 @@
package moe.lava.banksia.server
import io.ktor.util.logging.Logger
import moe.lava.banksia.model.Route
import moe.lava.banksia.model.Service
import moe.lava.banksia.model.Shape
import moe.lava.banksia.model.Stop
import moe.lava.banksia.model.StopTime
import moe.lava.banksia.model.Trip
import moe.lava.banksia.room.Database
import moe.lava.banksia.room.entity.asEntity
import moe.lava.banksia.server.gtfs.GtfsData
import moe.lava.banksia.server.gtfs.GtfsParser
import kotlin.time.Clock
class GtfsImporter(
private val parser: GtfsParser,
private val database: Database,
private val log: Logger,
) {
suspend fun import(url: String, date: Long = Clock.System.now().epochSeconds) {
database.routeDao.deleteAll()
database.serviceDao.deleteAll()
database.shapeDao.deleteAll()
database.stopDao.deleteAll()
database.stopTimeDao.deleteAll()
database.tripDao.deleteAll()
parser.update(url).collect { chunk ->
when (chunk) {
is GtfsData.RouteChunk -> addRoutes(chunk.routes)
is GtfsData.ServiceChunk -> addServices(chunk.services)
is GtfsData.ShapeChunk -> addShapes(chunk.shapes)
is GtfsData.StopChunk -> addStops(chunk.stops)
is GtfsData.StopTimeChunk -> addStopTimes(chunk.stopTimes)
is GtfsData.TripChunk -> addTrips(chunk.trips)
}
}
updateMetadata(date)
}
private suspend fun updateMetadata(date: Long) {
val dao = database.versionMetadataDao
log.info("updating metadata...")
dao.update(date, listOf("routes", "stops", "shapes", "trips", "stop_times"))
}
private suspend fun addRoutes(routes: List<Route>) {
val dao = database.routeDao
log.info("inserting routes...")
dao.insertOrReplaceAll(*routes.map { it.asEntity() }.toTypedArray())
}
private suspend fun addServices(services: List<Service>) {
val dao = database.serviceDao
log.info("inserting services...")
dao.insertOrReplaceAll(*services.map { it.asEntity() }.toTypedArray())
}
private suspend fun addShapes(shapes: List<Shape>) {
val dao = database.shapeDao
log.info("inserting shapes...")
dao.insertOrReplaceAll(*shapes.map { it.asEntity() }.toTypedArray())
}
private suspend fun addStops(stops: List<Stop>) {
val dao = database.stopDao
log.info("inserting stops...")
stops
.groupBy { it.id }
.forEach { (id, gstops) ->
if (gstops.size > 1) {
if (gstops.withIndex().any { (i, stop) -> i != 0 && stop != gstops[i - 1] }) {
gstops.forEach {
log.warn("duplicate $id: $it")
}
}
}
}
dao.insertOrReplaceAll(*stops.map { it.asEntity() }.toTypedArray())
}
private suspend fun addStopTimes(stopTimes: List<StopTime>) {
val dao = database.stopTimeDao
log.info("inserting ${stopTimes.size} stoptimes...")
dao.insertOrReplaceAll(*stopTimes.map { it.asEntity() }.toTypedArray())
}
private suspend fun addTrips(trips: List<Trip>) {
val dao = database.tripDao
log.info("inserting ${trips.size} trips...")
dao.insertOrReplaceAll(*trips.map { it.asEntity() }.toTypedArray())
}
}

View file

@ -1,13 +1,16 @@
package moe.lava.banksia.server.di
import io.ktor.client.HttpClient
import moe.lava.banksia.server.gtfs.GtfsHandler
import moe.lava.banksia.server.GtfsImporter
import moe.lava.banksia.server.gtfs.GtfsParser
import moe.lava.banksia.server.gtfsr.GtfsrService
import org.koin.core.module.dsl.singleOf
import org.koin.dsl.module
val ServerModules = module {
single { HttpClient() }
singleOf(::GtfsHandler)
singleOf(::GtfsParser)
singleOf(::GtfsrService)
singleOf(::GtfsImporter)
}

View file

@ -34,6 +34,8 @@ dependencyResolutionManagement {
include(":androidApp")
include(":client")
include(":server")
include(":server:gtfs")
include(":server:gtfs_rt")
include(":shared")
include(":ui")
include(":ui:maps")

View file

@ -13,4 +13,8 @@ enum class RouteType(val value: Int) {
SkyBus(11),
Interstate(10),
;
companion object {
fun from(value: Int) = RouteType.entries.first { it.value == value }
}
}

View file

@ -5,7 +5,7 @@ import moe.lava.banksia.model.RouteType
object RouteTypeConverter {
@TypeConverter
fun from(value: Int) = RouteType.entries.first { it.value == value }
fun from(value: Int) = RouteType.from(value)
@TypeConverter
fun to(routeType: RouteType) = routeType.value

View file

@ -3,6 +3,7 @@ package moe.lava.banksia.room.dao
import androidx.room.Dao
import androidx.room.Delete
import androidx.room.Insert
import androidx.room.OnConflictStrategy.Companion.REPLACE
import androidx.room.Query
import moe.lava.banksia.room.entity.ShapeEntity
@ -14,6 +15,9 @@ interface ShapeDao {
@Insert
suspend fun insertAll(vararg shapes: ShapeEntity)
@Insert(onConflict = REPLACE)
suspend fun insertOrReplaceAll(vararg shapes: ShapeEntity)
@Delete
suspend fun delete(shape: ShapeEntity)

View file

@ -49,7 +49,7 @@ sealed class MapScreenEvent {
data class SearchUpdate(val text: String) : MapScreenEvent()
}
data class InternalState(
private data class InternalState(
val route: String? = null,
val stop: String? = null,
val run: String? = null,