From 0d4fd6f214ffbb4e07fb4e5575baa1d751e09366 Mon Sep 17 00:00:00 2001 From: Dmitry Popov Date: Thu, 13 Nov 2025 15:48:56 +0100 Subject: [PATCH] fix(core): fixed maps start/stop logic, added server downtime period support --- lib/wanderer_app/character/tracker_pool.ex | 2 +- .../character/transactions_tracker_impl.ex | 24 +- lib/wanderer_app/map/map_pool.ex | 291 +++++++++++++-- .../map/map_pool_dynamic_supervisor.ex | 75 +++- lib/wanderer_app/map/map_pool_supervisor.ex | 3 +- lib/wanderer_app/map/map_reconciler.ex | 280 ++++++++++++++ .../server/server_status_tracker.ex | 73 +++- test/unit/map/map_pool_test.exs | 343 ++++++++++++++++++ 8 files changed, 1032 insertions(+), 59 deletions(-) create mode 100644 lib/wanderer_app/map/map_reconciler.ex create mode 100644 test/unit/map/map_pool_test.exs diff --git a/lib/wanderer_app/character/tracker_pool.ex b/lib/wanderer_app/character/tracker_pool.ex index 10e534ac..603ab0c4 100644 --- a/lib/wanderer_app/character/tracker_pool.ex +++ b/lib/wanderer_app/character/tracker_pool.ex @@ -8,7 +8,7 @@ defmodule WandererApp.Character.TrackerPool do :tracked_ids, :uuid, :characters, - server_online: true + server_online: false ] @name __MODULE__ diff --git a/lib/wanderer_app/character/transactions_tracker_impl.ex b/lib/wanderer_app/character/transactions_tracker_impl.ex index 877175dc..c2376d3b 100644 --- a/lib/wanderer_app/character/transactions_tracker_impl.ex +++ b/lib/wanderer_app/character/transactions_tracker_impl.ex @@ -12,7 +12,7 @@ defmodule WandererApp.Character.TransactionsTracker.Impl do total_balance: 0, transactions: [], retries: 5, - server_online: true, + server_online: false, status: :started ] @@ -75,7 +75,7 @@ defmodule WandererApp.Character.TransactionsTracker.Impl do def handle_event( :update_corp_wallets, - %{character: character} = state + %{character: character, server_online: true} = state ) do Process.send_after(self(), :update_corp_wallets, @update_interval) @@ -88,26 +88,26 @@ defmodule WandererApp.Character.TransactionsTracker.Impl do :update_corp_wallets, state ) do - Process.send_after(self(), :update_corp_wallets, :timer.seconds(15)) + Process.send_after(self(), :update_corp_wallets, @update_interval) state end def handle_event( :check_wallets, - %{wallets: []} = state + %{character: character, wallets: wallets, server_online: true} = state ) do - Process.send_after(self(), :check_wallets, :timer.seconds(5)) + Process.send_after(self(), :check_wallets, @update_interval) - state - end - - def handle_event( - :check_wallets, - %{character: character, wallets: wallets} = state - ) do check_wallets(wallets, character) + state + end + + def handle_event( + :check_wallets, + state + ) do Process.send_after(self(), :check_wallets, @update_interval) state diff --git a/lib/wanderer_app/map/map_pool.ex b/lib/wanderer_app/map/map_pool.ex index 8688d6f6..dc0c135a 100644 --- a/lib/wanderer_app/map/map_pool.ex +++ b/lib/wanderer_app/map/map_pool.ex @@ -64,11 +64,21 @@ defmodule WandererApp.Map.MapPool do def handle_continue({:start, map_ids}, state) do Logger.info("#{@name} started") - map_ids - |> Enum.each(fn map_id -> - GenServer.cast(self(), {:start_map, map_id}) - end) + # Start maps synchronously and accumulate state changes + new_state = + map_ids + |> Enum.reduce(state, fn map_id, current_state -> + case do_start_map(map_id, current_state) do + {:ok, updated_state} -> + updated_state + {:error, reason} -> + Logger.error("[Map Pool] Failed to start map #{map_id}: #{reason}") + current_state + end + end) + + # Schedule periodic tasks Process.send_after(self(), :backup_state, @backup_state_timeout) Process.send_after(self(), :cleanup_systems, 15_000) Process.send_after(self(), :cleanup_characters, @characters_cleanup_timeout) @@ -77,28 +87,21 @@ defmodule WandererApp.Map.MapPool do # Start message queue monitoring Process.send_after(self(), :monitor_message_queue, :timer.seconds(30)) - {:noreply, state} + {:noreply, new_state} end @impl true def handle_cast(:stop, state), do: {:stop, :normal, state} @impl true - def handle_cast({:start_map, map_id}, %{map_ids: map_ids, uuid: uuid} = state) do - if map_id not in map_ids do - Registry.update_value(@unique_registry, Module.concat(__MODULE__, uuid), fn r_map_ids -> - [map_id | r_map_ids] - end) + def handle_cast({:start_map, map_id}, state) do + case do_start_map(map_id, state) do + {:ok, new_state} -> + {:noreply, new_state} - Cachex.put(@cache, map_id, uuid) - - map_id - |> WandererApp.Map.get_map_state!() - |> Server.Impl.start_map() - - {:noreply, %{state | map_ids: [map_id | map_ids]}} - else - {:noreply, state} + {:error, _reason} -> + # Error already logged in do_start_map + {:noreply, state} end end @@ -119,6 +122,215 @@ defmodule WandererApp.Map.MapPool do {:noreply, %{state | map_ids: map_ids |> Enum.reject(fn id -> id == map_id end)}} end + @impl true + def handle_call( + {:stop_map, map_id}, + _from, + state + ) do + case do_stop_map(map_id, state) do + {:ok, new_state} -> + {:reply, :ok, new_state} + + {:error, reason} -> + {:reply, {:error, reason}, state} + end + end + + defp do_start_map(map_id, %{map_ids: map_ids, uuid: uuid} = state) do + if map_id in map_ids do + # Map already started + {:ok, state} + else + # Track what operations succeeded for potential rollback + completed_operations = [] + + try do + # Step 1: Update Registry (most critical, do first) + registry_result = + Registry.update_value(@unique_registry, Module.concat(__MODULE__, uuid), fn r_map_ids -> + [map_id | r_map_ids] + end) + + completed_operations = [:registry | completed_operations] + + case registry_result do + {new_value, _old_value} when is_list(new_value) -> + :ok + + :error -> + raise "Failed to update registry for pool #{uuid}" + end + + # Step 2: Add to cache + case Cachex.put(@cache, map_id, uuid) do + {:ok, _} -> + completed_operations = [:cache | completed_operations] + + {:error, reason} -> + raise "Failed to add to cache: #{inspect(reason)}" + end + + # Step 3: Start the map server + map_id + |> WandererApp.Map.get_map_state!() + |> Server.Impl.start_map() + + completed_operations = [:map_server | completed_operations] + + # Step 4: Update GenServer state (last, as this is in-memory and fast) + new_state = %{state | map_ids: [map_id | map_ids]} + + Logger.debug("[Map Pool] Successfully started map #{map_id} in pool #{uuid}") + {:ok, new_state} + rescue + e -> + Logger.error(""" + [Map Pool] Failed to start map #{map_id} (completed: #{inspect(completed_operations)}): #{Exception.message(e)} + #{Exception.format_stacktrace(__STACKTRACE__)} + """) + + # Attempt rollback of completed operations + rollback_start_map_operations(map_id, uuid, completed_operations) + + {:error, Exception.message(e)} + end + end + end + + defp rollback_start_map_operations(map_id, uuid, completed_operations) do + Logger.warning("[Map Pool] Attempting to rollback start_map operations for #{map_id}") + + # Rollback in reverse order + if :map_server in completed_operations do + Logger.debug("[Map Pool] Rollback: Stopping map server for #{map_id}") + + try do + Server.Impl.stop_map(map_id) + rescue + e -> + Logger.error("[Map Pool] Rollback failed to stop map server: #{Exception.message(e)}") + end + end + + if :cache in completed_operations do + Logger.debug("[Map Pool] Rollback: Removing #{map_id} from cache") + + case Cachex.del(@cache, map_id) do + {:ok, _} -> + :ok + + {:error, reason} -> + Logger.error("[Map Pool] Rollback failed for cache: #{inspect(reason)}") + end + end + + if :registry in completed_operations do + Logger.debug("[Map Pool] Rollback: Removing #{map_id} from registry") + + try do + Registry.update_value(@unique_registry, Module.concat(__MODULE__, uuid), fn r_map_ids -> + r_map_ids |> Enum.reject(fn id -> id == map_id end) + end) + rescue + e -> + Logger.error("[Map Pool] Rollback failed for registry: #{Exception.message(e)}") + end + end + end + + defp do_stop_map(map_id, %{map_ids: map_ids, uuid: uuid} = state) do + # Track what operations succeeded for potential rollback + completed_operations = [] + + try do + # Step 1: Update Registry (most critical, do first) + registry_result = + Registry.update_value(@unique_registry, Module.concat(__MODULE__, uuid), fn r_map_ids -> + r_map_ids |> Enum.reject(fn id -> id == map_id end) + end) + + completed_operations = [:registry | completed_operations] + + case registry_result do + {new_value, _old_value} when is_list(new_value) -> + :ok + + :error -> + raise "Failed to update registry for pool #{uuid}" + end + + # Step 2: Delete from cache + case Cachex.del(@cache, map_id) do + {:ok, _} -> + completed_operations = [:cache | completed_operations] + + {:error, reason} -> + raise "Failed to delete from cache: #{inspect(reason)}" + end + + # Step 3: Stop the map server (clean up all map resources) + map_id + |> Server.Impl.stop_map() + + completed_operations = [:map_server | completed_operations] + + # Step 4: Update GenServer state (last, as this is in-memory and fast) + new_state = %{state | map_ids: map_ids |> Enum.reject(fn id -> id == map_id end)} + + Logger.debug("[Map Pool] Successfully stopped map #{map_id} from pool #{uuid}") + {:ok, new_state} + rescue + e -> + Logger.error(""" + [Map Pool] Failed to stop map #{map_id} (completed: #{inspect(completed_operations)}): #{Exception.message(e)} + #{Exception.format_stacktrace(__STACKTRACE__)} + """) + + # Attempt rollback of completed operations + rollback_stop_map_operations(map_id, uuid, completed_operations) + + {:error, Exception.message(e)} + end + end + + defp rollback_stop_map_operations(map_id, uuid, completed_operations) do + Logger.warning("[Map Pool] Attempting to rollback stop_map operations for #{map_id}") + + # Rollback in reverse order + if :cache in completed_operations do + Logger.debug("[Map Pool] Rollback: Re-adding #{map_id} to cache") + + case Cachex.put(@cache, map_id, uuid) do + {:ok, _} -> + :ok + + {:error, reason} -> + Logger.error("[Map Pool] Rollback failed for cache: #{inspect(reason)}") + end + end + + if :registry in completed_operations do + Logger.debug("[Map Pool] Rollback: Re-adding #{map_id} to registry") + + try do + Registry.update_value(@unique_registry, Module.concat(__MODULE__, uuid), fn r_map_ids -> + if map_id in r_map_ids do + r_map_ids + else + [map_id | r_map_ids] + end + end) + rescue + e -> + Logger.error("[Map Pool] Rollback failed for registry: #{Exception.message(e)}") + end + end + + # Note: We don't rollback map_server stop as Server.Impl.stop_map() is idempotent + # and the cleanup operations are safe to leave in a "stopped" state + end + @impl true def handle_call(:error, _, state), do: {:stop, :error, :ok, state} @@ -231,25 +443,38 @@ defmodule WandererApp.Map.MapPool do Process.send_after(self(), :garbage_collect, @garbage_collection_interval) try do - map_ids - |> Enum.each(fn map_id -> - # presence_character_ids = - # WandererApp.Cache.lookup!("map_#{map_id}:presence_character_ids", []) + # Process each map and accumulate state changes + new_state = + map_ids + |> Enum.reduce(state, fn map_id, current_state -> + presence_character_ids = + WandererApp.Cache.lookup!("map_#{map_id}:presence_character_ids", []) - # if presence_character_ids |> Enum.empty?() do - Logger.info( - "#{uuid}: No more characters present on: #{map_id}, shutting down map server..." - ) + if presence_character_ids |> Enum.empty?() do + Logger.info( + "#{uuid}: No more characters present on: #{map_id}, shutting down map server..." + ) - GenServer.cast(self(), {:stop_map, map_id}) - # end - end) + case do_stop_map(map_id, current_state) do + {:ok, updated_state} -> + Logger.debug("#{uuid}: Successfully stopped map #{map_id}") + updated_state + + {:error, reason} -> + Logger.error("#{uuid}: Failed to stop map #{map_id}: #{reason}") + current_state + end + else + current_state + end + end) + + {:noreply, new_state} rescue e -> - Logger.error(Exception.message(e)) + Logger.error("#{uuid}: Garbage collection error: #{Exception.message(e)}") + {:noreply, state} end - - {:noreply, state} end @impl true diff --git a/lib/wanderer_app/map/map_pool_dynamic_supervisor.ex b/lib/wanderer_app/map/map_pool_dynamic_supervisor.ex index c2eeb16b..93deed26 100644 --- a/lib/wanderer_app/map/map_pool_dynamic_supervisor.ex +++ b/lib/wanderer_app/map/map_pool_dynamic_supervisor.ex @@ -36,17 +36,78 @@ defmodule WandererApp.Map.MapPoolDynamicSupervisor do end def stop_map(map_id) do - {:ok, pool_uuid} = Cachex.get(@cache, map_id) + case Cachex.get(@cache, map_id) do + {:ok, nil} -> + # Cache miss - try to find the pool by scanning the registry + Logger.warning( + "Cache miss for map #{map_id}, scanning registry for pool containing this map" + ) - case Registry.lookup( - @unique_registry, - Module.concat(WandererApp.Map.MapPool, pool_uuid) - ) do + find_pool_by_scanning_registry(map_id) + + {:ok, pool_uuid} -> + # Cache hit - use the pool_uuid to lookup the pool + case Registry.lookup( + @unique_registry, + Module.concat(WandererApp.Map.MapPool, pool_uuid) + ) do + [] -> + Logger.warning( + "Pool with UUID #{pool_uuid} not found in registry for map #{map_id}, scanning registry" + ) + + find_pool_by_scanning_registry(map_id) + + [{pool_pid, _}] -> + GenServer.cast(pool_pid, {:stop_map, map_id}) + end + + {:error, reason} -> + Logger.error("Failed to lookup map #{map_id} in cache: #{inspect(reason)}") + :ok + end + end + + defp find_pool_by_scanning_registry(map_id) do + case Registry.lookup(@registry, WandererApp.Map.MapPool) do [] -> + Logger.debug("No map pools found in registry for map #{map_id}") :ok - [{pool_pid, _}] -> - GenServer.cast(pool_pid, {:stop_map, map_id}) + pools -> + # Scan all pools to find the one containing this map_id + found_pool = + Enum.find_value(pools, fn {_pid, uuid} -> + case Registry.lookup( + @unique_registry, + Module.concat(WandererApp.Map.MapPool, uuid) + ) do + [{pool_pid, map_ids}] -> + if map_id in map_ids do + {pool_pid, uuid} + else + nil + end + + _ -> + nil + end + end) + + case found_pool do + {pool_pid, pool_uuid} -> + Logger.info( + "Found map #{map_id} in pool #{pool_uuid} via registry scan, updating cache" + ) + + # Update the cache to fix the inconsistency + Cachex.put(@cache, map_id, pool_uuid) + GenServer.cast(pool_pid, {:stop_map, map_id}) + + nil -> + Logger.debug("Map #{map_id} not found in any pool registry") + :ok + end end end diff --git a/lib/wanderer_app/map/map_pool_supervisor.ex b/lib/wanderer_app/map/map_pool_supervisor.ex index b3d0b170..cfa284dd 100644 --- a/lib/wanderer_app/map/map_pool_supervisor.ex +++ b/lib/wanderer_app/map/map_pool_supervisor.ex @@ -14,7 +14,8 @@ defmodule WandererApp.Map.MapPoolSupervisor do children = [ {Registry, [keys: :unique, name: @unique_registry]}, {Registry, [keys: :duplicate, name: @registry]}, - {WandererApp.Map.MapPoolDynamicSupervisor, []} + {WandererApp.Map.MapPoolDynamicSupervisor, []}, + {WandererApp.Map.Reconciler, []} ] Supervisor.init(children, strategy: :rest_for_one, max_restarts: 10) diff --git a/lib/wanderer_app/map/map_reconciler.ex b/lib/wanderer_app/map/map_reconciler.ex new file mode 100644 index 00000000..d108e412 --- /dev/null +++ b/lib/wanderer_app/map/map_reconciler.ex @@ -0,0 +1,280 @@ +defmodule WandererApp.Map.Reconciler do + @moduledoc """ + Periodically reconciles map state across different stores (Cache, Registry, GenServer state) + to detect and fix inconsistencies that may prevent map servers from restarting. + """ + use GenServer + + require Logger + + @cache :map_pool_cache + @registry :map_pool_registry + @unique_registry :unique_map_pool_registry + @reconciliation_interval :timer.minutes(5) + + def start_link(_opts) do + GenServer.start_link(__MODULE__, [], name: __MODULE__) + end + + @impl true + def init(_opts) do + Logger.info("Starting Map Reconciler") + schedule_reconciliation() + {:ok, %{}} + end + + @impl true + def handle_info(:reconcile, state) do + schedule_reconciliation() + + try do + reconcile_state() + rescue + e -> + Logger.error(""" + [Map Reconciler] reconciliation error: #{Exception.message(e)} + #{Exception.format_stacktrace(__STACKTRACE__)} + """) + end + + {:noreply, state} + end + + @doc """ + Manually trigger a reconciliation (useful for testing or manual cleanup) + """ + def trigger_reconciliation do + GenServer.cast(__MODULE__, :reconcile_now) + end + + @impl true + def handle_cast(:reconcile_now, state) do + try do + reconcile_state() + rescue + e -> + Logger.error(""" + [Map Reconciler] manual reconciliation error: #{Exception.message(e)} + #{Exception.format_stacktrace(__STACKTRACE__)} + """) + end + + {:noreply, state} + end + + defp schedule_reconciliation do + Process.send_after(self(), :reconcile, @reconciliation_interval) + end + + defp reconcile_state do + Logger.debug("[Map Reconciler] Starting state reconciliation") + + # Get started_maps from cache + {:ok, started_maps} = WandererApp.Cache.lookup("started_maps", []) + + # Get all maps from registries + registry_maps = get_all_registry_maps() + + # Detect zombie maps (in started_maps but not in any registry) + zombie_maps = started_maps -- registry_maps + # Detect orphan maps (in registry but not in started_maps) + orphan_maps = registry_maps -- started_maps + + # Detect cache inconsistencies (map_pool_cache pointing to wrong or non-existent pools) + cache_inconsistencies = find_cache_inconsistencies(registry_maps) + + stats = %{ + total_started_maps: length(started_maps), + total_registry_maps: length(registry_maps), + zombie_maps: length(zombie_maps), + orphan_maps: length(orphan_maps), + cache_inconsistencies: length(cache_inconsistencies) + } + + Logger.info("[Map Reconciler] Reconciliation stats: #{inspect(stats)}") + + # Emit telemetry + :telemetry.execute( + [:wanderer_app, :map, :reconciliation], + stats, + %{} + ) + + # Clean up zombie maps + cleanup_zombie_maps(zombie_maps) + + # Fix orphan maps + fix_orphan_maps(orphan_maps) + + # Fix cache inconsistencies + fix_cache_inconsistencies(cache_inconsistencies) + + Logger.debug("[Map Reconciler] State reconciliation completed") + end + + defp get_all_registry_maps do + case Registry.lookup(@registry, WandererApp.Map.MapPool) do + [] -> + [] + + pools -> + pools + |> Enum.flat_map(fn {_pid, uuid} -> + case Registry.lookup( + @unique_registry, + Module.concat(WandererApp.Map.MapPool, uuid) + ) do + [{_pool_pid, map_ids}] -> map_ids + _ -> [] + end + end) + |> Enum.uniq() + end + end + + defp find_cache_inconsistencies(registry_maps) do + registry_maps + |> Enum.filter(fn map_id -> + case Cachex.get(@cache, map_id) do + {:ok, nil} -> + # Map in registry but not in cache + true + + {:ok, pool_uuid} -> + # Check if the pool_uuid actually exists in registry + case Registry.lookup( + @unique_registry, + Module.concat(WandererApp.Map.MapPool, pool_uuid) + ) do + [] -> + # Cache points to non-existent pool + true + + [{_pool_pid, map_ids}] -> + # Check if this map is actually in the pool's map_ids + map_id not in map_ids + + _ -> + false + end + + {:error, _} -> + true + end + end) + end + + defp cleanup_zombie_maps([]), do: :ok + + defp cleanup_zombie_maps(zombie_maps) do + Logger.warning("[Map Reconciler] Found #{length(zombie_maps)} zombie maps: #{inspect(zombie_maps)}") + + Enum.each(zombie_maps, fn map_id -> + Logger.info("[Map Reconciler] Cleaning up zombie map: #{map_id}") + + # Remove from started_maps cache + WandererApp.Cache.insert_or_update( + "started_maps", + [], + fn started_maps -> + started_maps |> Enum.reject(fn started_map_id -> started_map_id == map_id end) + end + ) + + # Clean up any stale map_pool_cache entries + Cachex.del(@cache, map_id) + + # Clean up map-specific caches + WandererApp.Cache.delete("map_#{map_id}:started") + WandererApp.Cache.delete("map_characters-#{map_id}") + WandererApp.Map.CacheRTree.clear_tree("rtree_#{map_id}") + WandererApp.Map.delete_map_state(map_id) + + :telemetry.execute( + [:wanderer_app, :map, :reconciliation, :zombie_cleanup], + %{count: 1}, + %{map_id: map_id} + ) + end) + end + + defp fix_orphan_maps([]), do: :ok + + defp fix_orphan_maps(orphan_maps) do + Logger.warning("[Map Reconciler] Found #{length(orphan_maps)} orphan maps: #{inspect(orphan_maps)}") + + Enum.each(orphan_maps, fn map_id -> + Logger.info("[Map Reconciler] Fixing orphan map: #{map_id}") + + # Add to started_maps cache + WandererApp.Cache.insert_or_update( + "started_maps", + [map_id], + fn existing -> + [map_id | existing] |> Enum.uniq() + end + ) + + :telemetry.execute( + [:wanderer_app, :map, :reconciliation, :orphan_fixed], + %{count: 1}, + %{map_id: map_id} + ) + end) + end + + defp fix_cache_inconsistencies([]), do: :ok + + defp fix_cache_inconsistencies(inconsistent_maps) do + Logger.warning( + "[Map Reconciler] Found #{length(inconsistent_maps)} cache inconsistencies: #{inspect(inconsistent_maps)}" + ) + + Enum.each(inconsistent_maps, fn map_id -> + Logger.info("[Map Reconciler] Fixing cache inconsistency for map: #{map_id}") + + # Find the correct pool for this map + case find_pool_for_map(map_id) do + {:ok, pool_uuid} -> + Logger.info("[Map Reconciler] Updating cache: #{map_id} -> #{pool_uuid}") + Cachex.put(@cache, map_id, pool_uuid) + + :telemetry.execute( + [:wanderer_app, :map, :reconciliation, :cache_fixed], + %{count: 1}, + %{map_id: map_id, pool_uuid: pool_uuid} + ) + + :error -> + Logger.warning("[Map Reconciler] Could not find pool for map #{map_id}, removing from cache") + Cachex.del(@cache, map_id) + end + end) + end + + defp find_pool_for_map(map_id) do + case Registry.lookup(@registry, WandererApp.Map.MapPool) do + [] -> + :error + + pools -> + pools + |> Enum.find_value(:error, fn {_pid, uuid} -> + case Registry.lookup( + @unique_registry, + Module.concat(WandererApp.Map.MapPool, uuid) + ) do + [{_pool_pid, map_ids}] -> + if map_id in map_ids do + {:ok, uuid} + else + nil + end + + _ -> + nil + end + end) + end + end +end diff --git a/lib/wanderer_app/server/server_status_tracker.ex b/lib/wanderer_app/server/server_status_tracker.ex index 180228fc..cf73a263 100644 --- a/lib/wanderer_app/server/server_status_tracker.ex +++ b/lib/wanderer_app/server/server_status_tracker.ex @@ -11,7 +11,9 @@ defmodule WandererApp.Server.ServerStatusTracker do :server_version, :start_time, :vip, - :retries + :retries, + :in_forced_downtime, + :downtime_notified ] @retries_count 3 @@ -21,9 +23,17 @@ defmodule WandererApp.Server.ServerStatusTracker do retries: @retries_count, server_version: "0", start_time: "0", - vip: true + vip: true, + in_forced_downtime: false, + downtime_notified: false } + # EVE Online daily downtime period (UTC/GMT) + @downtime_start_hour 10 + @downtime_start_minute 58 + @downtime_end_hour 11 + @downtime_end_minute 2 + @refresh_interval :timer.minutes(1) @logger Application.compile_env(:wanderer_app, :logger) @@ -57,13 +67,51 @@ defmodule WandererApp.Server.ServerStatusTracker do def handle_info( :refresh_status, %{ - retries: retries + retries: retries, + in_forced_downtime: was_in_downtime } = state ) do Process.send_after(self(), :refresh_status, @refresh_interval) - Task.async(fn -> get_server_status(retries) end) - {:noreply, state} + in_downtime = in_forced_downtime?() + + cond do + # Entering downtime period - broadcast offline status immediately + in_downtime and not was_in_downtime -> + @logger.info("#{__MODULE__} entering forced downtime period (10:58-11:02 GMT)") + + downtime_status = %{ + players: 0, + server_version: "downtime", + start_time: DateTime.utc_now() |> DateTime.to_iso8601(), + vip: true + } + + Phoenix.PubSub.broadcast( + WandererApp.PubSub, + "server_status", + {:server_status, downtime_status} + ) + + {:noreply, + %{state | in_forced_downtime: true, downtime_notified: true} + |> Map.merge(downtime_status)} + + # Currently in downtime - skip API call + in_downtime -> + {:noreply, state} + + # Exiting downtime period - resume normal operations + not in_downtime and was_in_downtime -> + @logger.info("#{__MODULE__} exiting forced downtime period, resuming normal operations") + Task.async(fn -> get_server_status(retries) end) + {:noreply, %{state | in_forced_downtime: false, downtime_notified: false}} + + # Normal operation + true -> + Task.async(fn -> get_server_status(retries) end) + {:noreply, state} + end end @impl true @@ -155,4 +203,19 @@ defmodule WandererApp.Server.ServerStatusTracker do vip: false } end + + # Checks if the current UTC time falls within the forced downtime period (10:58-11:02 GMT). + defp in_forced_downtime? do + now = DateTime.utc_now() + current_hour = now.hour + current_minute = now.minute + + # Convert times to minutes since midnight for easier comparison + current_time_minutes = current_hour * 60 + current_minute + downtime_start_minutes = @downtime_start_hour * 60 + @downtime_start_minute + downtime_end_minutes = @downtime_end_hour * 60 + @downtime_end_minute + + current_time_minutes >= downtime_start_minutes and + current_time_minutes < downtime_end_minutes + end end diff --git a/test/unit/map/map_pool_test.exs b/test/unit/map/map_pool_test.exs new file mode 100644 index 00000000..748fd29d --- /dev/null +++ b/test/unit/map/map_pool_test.exs @@ -0,0 +1,343 @@ +defmodule WandererApp.Map.MapPoolTest do + use ExUnit.Case, async: false + + alias WandererApp.Map.{MapPool, MapPoolDynamicSupervisor, Reconciler} + + @cache :map_pool_cache + @registry :map_pool_registry + @unique_registry :unique_map_pool_registry + + setup do + # Clean up any existing test data + cleanup_test_data() + + # Check if required infrastructure is running + registries_running? = + try do + Registry.keys(@registry, self()) != :error + rescue + _ -> false + end + + reconciler_running? = Process.whereis(Reconciler) != nil + + on_exit(fn -> + cleanup_test_data() + end) + + {:ok, registries_running: registries_running?, reconciler_running: reconciler_running?} + end + + defp cleanup_test_data do + # Clean up test caches + WandererApp.Cache.delete("started_maps") + Cachex.clear(@cache) + end + + describe "garbage collection with synchronous stop" do + @tag :skip + test "garbage collector successfully stops map with synchronous call" do + # This test would require setting up a full map pool with a test map + # Skipping for now as it requires more complex setup with actual map data + :ok + end + + @tag :skip + test "garbage collector handles stop failures gracefully" do + # This test would verify error handling when stop fails + :ok + end + end + + describe "cache lookup with registry fallback" do + test "stop_map handles cache miss by scanning registry", %{registries_running: registries_running?} do + if registries_running? do + # Setup: Create a map_id that's not in cache but will be found in registry scan + map_id = "test_map_#{:rand.uniform(1_000_000)}" + + # Verify cache is empty for this map + assert {:ok, nil} = Cachex.get(@cache, map_id) + + # Call stop_map - should handle gracefully with fallback + assert :ok = MapPoolDynamicSupervisor.stop_map(map_id) + else + # Skip test if registries not running + :ok + end + end + + test "stop_map handles non-existent pool_uuid in registry", %{registries_running: registries_running?} do + if registries_running? do + map_id = "test_map_#{:rand.uniform(1_000_000)}" + fake_uuid = "fake_uuid_#{:rand.uniform(1_000_000)}" + + # Put fake uuid in cache that doesn't exist in registry + Cachex.put(@cache, map_id, fake_uuid) + + # Call stop_map - should handle gracefully with fallback + assert :ok = MapPoolDynamicSupervisor.stop_map(map_id) + else + :ok + end + end + + test "stop_map updates cache when found via registry scan", %{registries_running: registries_running?} do + if registries_running? do + # This test would require a running pool with registered maps + # For now, we verify the fallback logic doesn't crash + map_id = "test_map_#{:rand.uniform(1_000_000)}" + assert :ok = MapPoolDynamicSupervisor.stop_map(map_id) + else + :ok + end + end + end + + describe "state cleanup atomicity" do + @tag :skip + test "rollback occurs when registry update fails" do + # This would require mocking Registry.update_value to fail + # Skipping for now as it requires more complex mocking setup + :ok + end + + @tag :skip + test "rollback occurs when cache delete fails" do + # This would require mocking Cachex.del to fail + :ok + end + + @tag :skip + test "successful cleanup updates all three state stores" do + # This would verify Registry, Cache, and GenServer state are all updated + :ok + end + end + + describe "Reconciler - zombie map detection and cleanup" do + test "reconciler detects zombie maps in started_maps cache", %{reconciler_running: reconciler_running?} do + if reconciler_running? do + # Setup: Add maps to started_maps that aren't in any registry + zombie_map_id = "zombie_map_#{:rand.uniform(1_000_000)}" + + WandererApp.Cache.insert_or_update( + "started_maps", + [zombie_map_id], + fn existing -> [zombie_map_id | existing] |> Enum.uniq() end + ) + + # Get started_maps + {:ok, started_maps} = WandererApp.Cache.lookup("started_maps", []) + assert zombie_map_id in started_maps + + # Trigger reconciliation + send(Reconciler, :reconcile) + # Give it time to process + Process.sleep(200) + + # Verify zombie was cleaned up + {:ok, started_maps_after} = WandererApp.Cache.lookup("started_maps", []) + refute zombie_map_id in started_maps_after + else + :ok + end + end + + test "reconciler cleans up zombie map caches", %{reconciler_running: reconciler_running?} do + if reconciler_running? do + zombie_map_id = "zombie_map_#{:rand.uniform(1_000_000)}" + + # Setup zombie state + WandererApp.Cache.insert_or_update( + "started_maps", + [zombie_map_id], + fn existing -> [zombie_map_id | existing] |> Enum.uniq() end + ) + + WandererApp.Cache.insert("map_#{zombie_map_id}:started", true) + Cachex.put(@cache, zombie_map_id, "fake_uuid") + + # Trigger reconciliation + send(Reconciler, :reconcile) + Process.sleep(200) + + # Verify all caches cleaned + {:ok, started_maps} = WandererApp.Cache.lookup("started_maps", []) + refute zombie_map_id in started_maps + + {:ok, cache_entry} = Cachex.get(@cache, zombie_map_id) + assert cache_entry == nil + else + :ok + end + end + end + + describe "Reconciler - orphan map detection and fix" do + @tag :skip + test "reconciler detects orphan maps in registry" do + # This would require setting up a pool with maps in registry + # but not in started_maps cache + :ok + end + + @tag :skip + test "reconciler adds orphan maps to started_maps cache" do + # This would verify orphan maps get added to the cache + :ok + end + end + + describe "Reconciler - cache inconsistency detection and fix" do + test "reconciler detects map with missing cache entry", %{reconciler_running: reconciler_running?} do + if reconciler_running? do + # This test verifies the reconciler can detect when a map + # is in the registry but has no cache entry + # Since we can't easily set up a full pool, we test the detection logic + + map_id = "test_map_#{:rand.uniform(1_000_000)}" + + # Ensure no cache entry + Cachex.del(@cache, map_id) + + # The reconciler would detect this if the map was in a registry + # For now, we just verify the logic doesn't crash + send(Reconciler, :reconcile) + Process.sleep(200) + + # No assertions needed - just verifying no crashes + end + end + + test "reconciler detects cache pointing to non-existent pool", %{reconciler_running: reconciler_running?} do + if reconciler_running? do + map_id = "test_map_#{:rand.uniform(1_000_000)}" + fake_uuid = "fake_uuid_#{:rand.uniform(1_000_000)}" + + # Put fake uuid in cache + Cachex.put(@cache, map_id, fake_uuid) + + # Trigger reconciliation + send(Reconciler, :reconcile) + Process.sleep(200) + + # Cache entry should be removed since pool doesn't exist + {:ok, cache_entry} = Cachex.get(@cache, map_id) + assert cache_entry == nil + else + :ok + end + end + end + + describe "Reconciler - stats and telemetry" do + test "reconciler emits telemetry events", %{reconciler_running: reconciler_running?} do + if reconciler_running? do + # Setup telemetry handler + test_pid = self() + + :telemetry.attach( + "test-reconciliation", + [:wanderer_app, :map, :reconciliation], + fn _event, measurements, _metadata, _config -> + send(test_pid, {:telemetry, measurements}) + end, + nil + ) + + # Trigger reconciliation + send(Reconciler, :reconcile) + Process.sleep(200) + + # Should receive telemetry event + assert_receive {:telemetry, measurements}, 500 + + assert is_integer(measurements.total_started_maps) + assert is_integer(measurements.total_registry_maps) + assert is_integer(measurements.zombie_maps) + assert is_integer(measurements.orphan_maps) + assert is_integer(measurements.cache_inconsistencies) + + # Cleanup + :telemetry.detach("test-reconciliation") + else + :ok + end + end + end + + describe "Reconciler - manual trigger" do + test "trigger_reconciliation runs reconciliation immediately", %{reconciler_running: reconciler_running?} do + if reconciler_running? do + zombie_map_id = "zombie_map_#{:rand.uniform(1_000_000)}" + + # Setup zombie state + WandererApp.Cache.insert_or_update( + "started_maps", + [zombie_map_id], + fn existing -> [zombie_map_id | existing] |> Enum.uniq() end + ) + + # Verify it exists + {:ok, started_maps_before} = WandererApp.Cache.lookup("started_maps", []) + assert zombie_map_id in started_maps_before + + # Trigger manual reconciliation + Reconciler.trigger_reconciliation() + Process.sleep(200) + + # Verify zombie was cleaned up + {:ok, started_maps_after} = WandererApp.Cache.lookup("started_maps", []) + refute zombie_map_id in started_maps_after + else + :ok + end + end + end + + describe "edge cases and error handling" do + test "stop_map with cache error returns ok", %{registries_running: registries_running?} do + if registries_running? do + map_id = "test_map_#{:rand.uniform(1_000_000)}" + + # Even if cache operations fail, should return :ok + assert :ok = MapPoolDynamicSupervisor.stop_map(map_id) + else + :ok + end + end + + test "reconciler handles empty registries gracefully", %{reconciler_running: reconciler_running?} do + if reconciler_running? do + # Clear everything + cleanup_test_data() + + # Should not crash even with empty data + send(Reconciler, :reconcile) + Process.sleep(200) + + # No assertions - just verifying no crash + assert true + else + :ok + end + end + + test "reconciler handles nil values in caches", %{reconciler_running: reconciler_running?} do + if reconciler_running? do + map_id = "test_map_#{:rand.uniform(1_000_000)}" + + # Explicitly set nil + Cachex.put(@cache, map_id, nil) + + # Should handle gracefully + send(Reconciler, :reconcile) + Process.sleep(200) + + assert true + else + :ok + end + end + end +end