mirror of
https://github.com/wanderer-industries/wanderer
synced 2026-04-30 06:27:01 +00:00
fix(core): fixed maps start/stop logic, added server downtime period support
This commit is contained in:
@@ -8,7 +8,7 @@ defmodule WandererApp.Character.TrackerPool do
|
||||
:tracked_ids,
|
||||
:uuid,
|
||||
:characters,
|
||||
server_online: true
|
||||
server_online: false
|
||||
]
|
||||
|
||||
@name __MODULE__
|
||||
|
||||
@@ -12,7 +12,7 @@ defmodule WandererApp.Character.TransactionsTracker.Impl do
|
||||
total_balance: 0,
|
||||
transactions: [],
|
||||
retries: 5,
|
||||
server_online: true,
|
||||
server_online: false,
|
||||
status: :started
|
||||
]
|
||||
|
||||
@@ -75,7 +75,7 @@ defmodule WandererApp.Character.TransactionsTracker.Impl do
|
||||
|
||||
def handle_event(
|
||||
:update_corp_wallets,
|
||||
%{character: character} = state
|
||||
%{character: character, server_online: true} = state
|
||||
) do
|
||||
Process.send_after(self(), :update_corp_wallets, @update_interval)
|
||||
|
||||
@@ -88,26 +88,26 @@ defmodule WandererApp.Character.TransactionsTracker.Impl do
|
||||
:update_corp_wallets,
|
||||
state
|
||||
) do
|
||||
Process.send_after(self(), :update_corp_wallets, :timer.seconds(15))
|
||||
Process.send_after(self(), :update_corp_wallets, @update_interval)
|
||||
|
||||
state
|
||||
end
|
||||
|
||||
def handle_event(
|
||||
:check_wallets,
|
||||
%{wallets: []} = state
|
||||
%{character: character, wallets: wallets, server_online: true} = state
|
||||
) do
|
||||
Process.send_after(self(), :check_wallets, :timer.seconds(5))
|
||||
Process.send_after(self(), :check_wallets, @update_interval)
|
||||
|
||||
state
|
||||
end
|
||||
|
||||
def handle_event(
|
||||
:check_wallets,
|
||||
%{character: character, wallets: wallets} = state
|
||||
) do
|
||||
check_wallets(wallets, character)
|
||||
|
||||
state
|
||||
end
|
||||
|
||||
def handle_event(
|
||||
:check_wallets,
|
||||
state
|
||||
) do
|
||||
Process.send_after(self(), :check_wallets, @update_interval)
|
||||
|
||||
state
|
||||
|
||||
@@ -64,11 +64,21 @@ defmodule WandererApp.Map.MapPool do
|
||||
def handle_continue({:start, map_ids}, state) do
|
||||
Logger.info("#{@name} started")
|
||||
|
||||
map_ids
|
||||
|> Enum.each(fn map_id ->
|
||||
GenServer.cast(self(), {:start_map, map_id})
|
||||
end)
|
||||
# Start maps synchronously and accumulate state changes
|
||||
new_state =
|
||||
map_ids
|
||||
|> Enum.reduce(state, fn map_id, current_state ->
|
||||
case do_start_map(map_id, current_state) do
|
||||
{:ok, updated_state} ->
|
||||
updated_state
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.error("[Map Pool] Failed to start map #{map_id}: #{reason}")
|
||||
current_state
|
||||
end
|
||||
end)
|
||||
|
||||
# Schedule periodic tasks
|
||||
Process.send_after(self(), :backup_state, @backup_state_timeout)
|
||||
Process.send_after(self(), :cleanup_systems, 15_000)
|
||||
Process.send_after(self(), :cleanup_characters, @characters_cleanup_timeout)
|
||||
@@ -77,28 +87,21 @@ defmodule WandererApp.Map.MapPool do
|
||||
# Start message queue monitoring
|
||||
Process.send_after(self(), :monitor_message_queue, :timer.seconds(30))
|
||||
|
||||
{:noreply, state}
|
||||
{:noreply, new_state}
|
||||
end
|
||||
|
||||
@impl true
|
||||
def handle_cast(:stop, state), do: {:stop, :normal, state}
|
||||
|
||||
@impl true
|
||||
def handle_cast({:start_map, map_id}, %{map_ids: map_ids, uuid: uuid} = state) do
|
||||
if map_id not in map_ids do
|
||||
Registry.update_value(@unique_registry, Module.concat(__MODULE__, uuid), fn r_map_ids ->
|
||||
[map_id | r_map_ids]
|
||||
end)
|
||||
def handle_cast({:start_map, map_id}, state) do
|
||||
case do_start_map(map_id, state) do
|
||||
{:ok, new_state} ->
|
||||
{:noreply, new_state}
|
||||
|
||||
Cachex.put(@cache, map_id, uuid)
|
||||
|
||||
map_id
|
||||
|> WandererApp.Map.get_map_state!()
|
||||
|> Server.Impl.start_map()
|
||||
|
||||
{:noreply, %{state | map_ids: [map_id | map_ids]}}
|
||||
else
|
||||
{:noreply, state}
|
||||
{:error, _reason} ->
|
||||
# Error already logged in do_start_map
|
||||
{:noreply, state}
|
||||
end
|
||||
end
|
||||
|
||||
@@ -119,6 +122,215 @@ defmodule WandererApp.Map.MapPool do
|
||||
{:noreply, %{state | map_ids: map_ids |> Enum.reject(fn id -> id == map_id end)}}
|
||||
end
|
||||
|
||||
@impl true
|
||||
def handle_call(
|
||||
{:stop_map, map_id},
|
||||
_from,
|
||||
state
|
||||
) do
|
||||
case do_stop_map(map_id, state) do
|
||||
{:ok, new_state} ->
|
||||
{:reply, :ok, new_state}
|
||||
|
||||
{:error, reason} ->
|
||||
{:reply, {:error, reason}, state}
|
||||
end
|
||||
end
|
||||
|
||||
defp do_start_map(map_id, %{map_ids: map_ids, uuid: uuid} = state) do
|
||||
if map_id in map_ids do
|
||||
# Map already started
|
||||
{:ok, state}
|
||||
else
|
||||
# Track what operations succeeded for potential rollback
|
||||
completed_operations = []
|
||||
|
||||
try do
|
||||
# Step 1: Update Registry (most critical, do first)
|
||||
registry_result =
|
||||
Registry.update_value(@unique_registry, Module.concat(__MODULE__, uuid), fn r_map_ids ->
|
||||
[map_id | r_map_ids]
|
||||
end)
|
||||
|
||||
completed_operations = [:registry | completed_operations]
|
||||
|
||||
case registry_result do
|
||||
{new_value, _old_value} when is_list(new_value) ->
|
||||
:ok
|
||||
|
||||
:error ->
|
||||
raise "Failed to update registry for pool #{uuid}"
|
||||
end
|
||||
|
||||
# Step 2: Add to cache
|
||||
case Cachex.put(@cache, map_id, uuid) do
|
||||
{:ok, _} ->
|
||||
completed_operations = [:cache | completed_operations]
|
||||
|
||||
{:error, reason} ->
|
||||
raise "Failed to add to cache: #{inspect(reason)}"
|
||||
end
|
||||
|
||||
# Step 3: Start the map server
|
||||
map_id
|
||||
|> WandererApp.Map.get_map_state!()
|
||||
|> Server.Impl.start_map()
|
||||
|
||||
completed_operations = [:map_server | completed_operations]
|
||||
|
||||
# Step 4: Update GenServer state (last, as this is in-memory and fast)
|
||||
new_state = %{state | map_ids: [map_id | map_ids]}
|
||||
|
||||
Logger.debug("[Map Pool] Successfully started map #{map_id} in pool #{uuid}")
|
||||
{:ok, new_state}
|
||||
rescue
|
||||
e ->
|
||||
Logger.error("""
|
||||
[Map Pool] Failed to start map #{map_id} (completed: #{inspect(completed_operations)}): #{Exception.message(e)}
|
||||
#{Exception.format_stacktrace(__STACKTRACE__)}
|
||||
""")
|
||||
|
||||
# Attempt rollback of completed operations
|
||||
rollback_start_map_operations(map_id, uuid, completed_operations)
|
||||
|
||||
{:error, Exception.message(e)}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
defp rollback_start_map_operations(map_id, uuid, completed_operations) do
|
||||
Logger.warning("[Map Pool] Attempting to rollback start_map operations for #{map_id}")
|
||||
|
||||
# Rollback in reverse order
|
||||
if :map_server in completed_operations do
|
||||
Logger.debug("[Map Pool] Rollback: Stopping map server for #{map_id}")
|
||||
|
||||
try do
|
||||
Server.Impl.stop_map(map_id)
|
||||
rescue
|
||||
e ->
|
||||
Logger.error("[Map Pool] Rollback failed to stop map server: #{Exception.message(e)}")
|
||||
end
|
||||
end
|
||||
|
||||
if :cache in completed_operations do
|
||||
Logger.debug("[Map Pool] Rollback: Removing #{map_id} from cache")
|
||||
|
||||
case Cachex.del(@cache, map_id) do
|
||||
{:ok, _} ->
|
||||
:ok
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.error("[Map Pool] Rollback failed for cache: #{inspect(reason)}")
|
||||
end
|
||||
end
|
||||
|
||||
if :registry in completed_operations do
|
||||
Logger.debug("[Map Pool] Rollback: Removing #{map_id} from registry")
|
||||
|
||||
try do
|
||||
Registry.update_value(@unique_registry, Module.concat(__MODULE__, uuid), fn r_map_ids ->
|
||||
r_map_ids |> Enum.reject(fn id -> id == map_id end)
|
||||
end)
|
||||
rescue
|
||||
e ->
|
||||
Logger.error("[Map Pool] Rollback failed for registry: #{Exception.message(e)}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
defp do_stop_map(map_id, %{map_ids: map_ids, uuid: uuid} = state) do
|
||||
# Track what operations succeeded for potential rollback
|
||||
completed_operations = []
|
||||
|
||||
try do
|
||||
# Step 1: Update Registry (most critical, do first)
|
||||
registry_result =
|
||||
Registry.update_value(@unique_registry, Module.concat(__MODULE__, uuid), fn r_map_ids ->
|
||||
r_map_ids |> Enum.reject(fn id -> id == map_id end)
|
||||
end)
|
||||
|
||||
completed_operations = [:registry | completed_operations]
|
||||
|
||||
case registry_result do
|
||||
{new_value, _old_value} when is_list(new_value) ->
|
||||
:ok
|
||||
|
||||
:error ->
|
||||
raise "Failed to update registry for pool #{uuid}"
|
||||
end
|
||||
|
||||
# Step 2: Delete from cache
|
||||
case Cachex.del(@cache, map_id) do
|
||||
{:ok, _} ->
|
||||
completed_operations = [:cache | completed_operations]
|
||||
|
||||
{:error, reason} ->
|
||||
raise "Failed to delete from cache: #{inspect(reason)}"
|
||||
end
|
||||
|
||||
# Step 3: Stop the map server (clean up all map resources)
|
||||
map_id
|
||||
|> Server.Impl.stop_map()
|
||||
|
||||
completed_operations = [:map_server | completed_operations]
|
||||
|
||||
# Step 4: Update GenServer state (last, as this is in-memory and fast)
|
||||
new_state = %{state | map_ids: map_ids |> Enum.reject(fn id -> id == map_id end)}
|
||||
|
||||
Logger.debug("[Map Pool] Successfully stopped map #{map_id} from pool #{uuid}")
|
||||
{:ok, new_state}
|
||||
rescue
|
||||
e ->
|
||||
Logger.error("""
|
||||
[Map Pool] Failed to stop map #{map_id} (completed: #{inspect(completed_operations)}): #{Exception.message(e)}
|
||||
#{Exception.format_stacktrace(__STACKTRACE__)}
|
||||
""")
|
||||
|
||||
# Attempt rollback of completed operations
|
||||
rollback_stop_map_operations(map_id, uuid, completed_operations)
|
||||
|
||||
{:error, Exception.message(e)}
|
||||
end
|
||||
end
|
||||
|
||||
defp rollback_stop_map_operations(map_id, uuid, completed_operations) do
|
||||
Logger.warning("[Map Pool] Attempting to rollback stop_map operations for #{map_id}")
|
||||
|
||||
# Rollback in reverse order
|
||||
if :cache in completed_operations do
|
||||
Logger.debug("[Map Pool] Rollback: Re-adding #{map_id} to cache")
|
||||
|
||||
case Cachex.put(@cache, map_id, uuid) do
|
||||
{:ok, _} ->
|
||||
:ok
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.error("[Map Pool] Rollback failed for cache: #{inspect(reason)}")
|
||||
end
|
||||
end
|
||||
|
||||
if :registry in completed_operations do
|
||||
Logger.debug("[Map Pool] Rollback: Re-adding #{map_id} to registry")
|
||||
|
||||
try do
|
||||
Registry.update_value(@unique_registry, Module.concat(__MODULE__, uuid), fn r_map_ids ->
|
||||
if map_id in r_map_ids do
|
||||
r_map_ids
|
||||
else
|
||||
[map_id | r_map_ids]
|
||||
end
|
||||
end)
|
||||
rescue
|
||||
e ->
|
||||
Logger.error("[Map Pool] Rollback failed for registry: #{Exception.message(e)}")
|
||||
end
|
||||
end
|
||||
|
||||
# Note: We don't rollback map_server stop as Server.Impl.stop_map() is idempotent
|
||||
# and the cleanup operations are safe to leave in a "stopped" state
|
||||
end
|
||||
|
||||
@impl true
|
||||
def handle_call(:error, _, state), do: {:stop, :error, :ok, state}
|
||||
|
||||
@@ -231,25 +443,38 @@ defmodule WandererApp.Map.MapPool do
|
||||
Process.send_after(self(), :garbage_collect, @garbage_collection_interval)
|
||||
|
||||
try do
|
||||
map_ids
|
||||
|> Enum.each(fn map_id ->
|
||||
# presence_character_ids =
|
||||
# WandererApp.Cache.lookup!("map_#{map_id}:presence_character_ids", [])
|
||||
# Process each map and accumulate state changes
|
||||
new_state =
|
||||
map_ids
|
||||
|> Enum.reduce(state, fn map_id, current_state ->
|
||||
presence_character_ids =
|
||||
WandererApp.Cache.lookup!("map_#{map_id}:presence_character_ids", [])
|
||||
|
||||
# if presence_character_ids |> Enum.empty?() do
|
||||
Logger.info(
|
||||
"#{uuid}: No more characters present on: #{map_id}, shutting down map server..."
|
||||
)
|
||||
if presence_character_ids |> Enum.empty?() do
|
||||
Logger.info(
|
||||
"#{uuid}: No more characters present on: #{map_id}, shutting down map server..."
|
||||
)
|
||||
|
||||
GenServer.cast(self(), {:stop_map, map_id})
|
||||
# end
|
||||
end)
|
||||
case do_stop_map(map_id, current_state) do
|
||||
{:ok, updated_state} ->
|
||||
Logger.debug("#{uuid}: Successfully stopped map #{map_id}")
|
||||
updated_state
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.error("#{uuid}: Failed to stop map #{map_id}: #{reason}")
|
||||
current_state
|
||||
end
|
||||
else
|
||||
current_state
|
||||
end
|
||||
end)
|
||||
|
||||
{:noreply, new_state}
|
||||
rescue
|
||||
e ->
|
||||
Logger.error(Exception.message(e))
|
||||
Logger.error("#{uuid}: Garbage collection error: #{Exception.message(e)}")
|
||||
{:noreply, state}
|
||||
end
|
||||
|
||||
{:noreply, state}
|
||||
end
|
||||
|
||||
@impl true
|
||||
|
||||
@@ -36,17 +36,78 @@ defmodule WandererApp.Map.MapPoolDynamicSupervisor do
|
||||
end
|
||||
|
||||
def stop_map(map_id) do
|
||||
{:ok, pool_uuid} = Cachex.get(@cache, map_id)
|
||||
case Cachex.get(@cache, map_id) do
|
||||
{:ok, nil} ->
|
||||
# Cache miss - try to find the pool by scanning the registry
|
||||
Logger.warning(
|
||||
"Cache miss for map #{map_id}, scanning registry for pool containing this map"
|
||||
)
|
||||
|
||||
case Registry.lookup(
|
||||
@unique_registry,
|
||||
Module.concat(WandererApp.Map.MapPool, pool_uuid)
|
||||
) do
|
||||
find_pool_by_scanning_registry(map_id)
|
||||
|
||||
{:ok, pool_uuid} ->
|
||||
# Cache hit - use the pool_uuid to lookup the pool
|
||||
case Registry.lookup(
|
||||
@unique_registry,
|
||||
Module.concat(WandererApp.Map.MapPool, pool_uuid)
|
||||
) do
|
||||
[] ->
|
||||
Logger.warning(
|
||||
"Pool with UUID #{pool_uuid} not found in registry for map #{map_id}, scanning registry"
|
||||
)
|
||||
|
||||
find_pool_by_scanning_registry(map_id)
|
||||
|
||||
[{pool_pid, _}] ->
|
||||
GenServer.cast(pool_pid, {:stop_map, map_id})
|
||||
end
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.error("Failed to lookup map #{map_id} in cache: #{inspect(reason)}")
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
defp find_pool_by_scanning_registry(map_id) do
|
||||
case Registry.lookup(@registry, WandererApp.Map.MapPool) do
|
||||
[] ->
|
||||
Logger.debug("No map pools found in registry for map #{map_id}")
|
||||
:ok
|
||||
|
||||
[{pool_pid, _}] ->
|
||||
GenServer.cast(pool_pid, {:stop_map, map_id})
|
||||
pools ->
|
||||
# Scan all pools to find the one containing this map_id
|
||||
found_pool =
|
||||
Enum.find_value(pools, fn {_pid, uuid} ->
|
||||
case Registry.lookup(
|
||||
@unique_registry,
|
||||
Module.concat(WandererApp.Map.MapPool, uuid)
|
||||
) do
|
||||
[{pool_pid, map_ids}] ->
|
||||
if map_id in map_ids do
|
||||
{pool_pid, uuid}
|
||||
else
|
||||
nil
|
||||
end
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end)
|
||||
|
||||
case found_pool do
|
||||
{pool_pid, pool_uuid} ->
|
||||
Logger.info(
|
||||
"Found map #{map_id} in pool #{pool_uuid} via registry scan, updating cache"
|
||||
)
|
||||
|
||||
# Update the cache to fix the inconsistency
|
||||
Cachex.put(@cache, map_id, pool_uuid)
|
||||
GenServer.cast(pool_pid, {:stop_map, map_id})
|
||||
|
||||
nil ->
|
||||
Logger.debug("Map #{map_id} not found in any pool registry")
|
||||
:ok
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -14,7 +14,8 @@ defmodule WandererApp.Map.MapPoolSupervisor do
|
||||
children = [
|
||||
{Registry, [keys: :unique, name: @unique_registry]},
|
||||
{Registry, [keys: :duplicate, name: @registry]},
|
||||
{WandererApp.Map.MapPoolDynamicSupervisor, []}
|
||||
{WandererApp.Map.MapPoolDynamicSupervisor, []},
|
||||
{WandererApp.Map.Reconciler, []}
|
||||
]
|
||||
|
||||
Supervisor.init(children, strategy: :rest_for_one, max_restarts: 10)
|
||||
|
||||
@@ -0,0 +1,280 @@
|
||||
defmodule WandererApp.Map.Reconciler do
|
||||
@moduledoc """
|
||||
Periodically reconciles map state across different stores (Cache, Registry, GenServer state)
|
||||
to detect and fix inconsistencies that may prevent map servers from restarting.
|
||||
"""
|
||||
use GenServer
|
||||
|
||||
require Logger
|
||||
|
||||
@cache :map_pool_cache
|
||||
@registry :map_pool_registry
|
||||
@unique_registry :unique_map_pool_registry
|
||||
@reconciliation_interval :timer.minutes(5)
|
||||
|
||||
def start_link(_opts) do
|
||||
GenServer.start_link(__MODULE__, [], name: __MODULE__)
|
||||
end
|
||||
|
||||
@impl true
|
||||
def init(_opts) do
|
||||
Logger.info("Starting Map Reconciler")
|
||||
schedule_reconciliation()
|
||||
{:ok, %{}}
|
||||
end
|
||||
|
||||
@impl true
|
||||
def handle_info(:reconcile, state) do
|
||||
schedule_reconciliation()
|
||||
|
||||
try do
|
||||
reconcile_state()
|
||||
rescue
|
||||
e ->
|
||||
Logger.error("""
|
||||
[Map Reconciler] reconciliation error: #{Exception.message(e)}
|
||||
#{Exception.format_stacktrace(__STACKTRACE__)}
|
||||
""")
|
||||
end
|
||||
|
||||
{:noreply, state}
|
||||
end
|
||||
|
||||
@doc """
|
||||
Manually trigger a reconciliation (useful for testing or manual cleanup)
|
||||
"""
|
||||
def trigger_reconciliation do
|
||||
GenServer.cast(__MODULE__, :reconcile_now)
|
||||
end
|
||||
|
||||
@impl true
|
||||
def handle_cast(:reconcile_now, state) do
|
||||
try do
|
||||
reconcile_state()
|
||||
rescue
|
||||
e ->
|
||||
Logger.error("""
|
||||
[Map Reconciler] manual reconciliation error: #{Exception.message(e)}
|
||||
#{Exception.format_stacktrace(__STACKTRACE__)}
|
||||
""")
|
||||
end
|
||||
|
||||
{:noreply, state}
|
||||
end
|
||||
|
||||
defp schedule_reconciliation do
|
||||
Process.send_after(self(), :reconcile, @reconciliation_interval)
|
||||
end
|
||||
|
||||
defp reconcile_state do
|
||||
Logger.debug("[Map Reconciler] Starting state reconciliation")
|
||||
|
||||
# Get started_maps from cache
|
||||
{:ok, started_maps} = WandererApp.Cache.lookup("started_maps", [])
|
||||
|
||||
# Get all maps from registries
|
||||
registry_maps = get_all_registry_maps()
|
||||
|
||||
# Detect zombie maps (in started_maps but not in any registry)
|
||||
zombie_maps = started_maps -- registry_maps
|
||||
# Detect orphan maps (in registry but not in started_maps)
|
||||
orphan_maps = registry_maps -- started_maps
|
||||
|
||||
# Detect cache inconsistencies (map_pool_cache pointing to wrong or non-existent pools)
|
||||
cache_inconsistencies = find_cache_inconsistencies(registry_maps)
|
||||
|
||||
stats = %{
|
||||
total_started_maps: length(started_maps),
|
||||
total_registry_maps: length(registry_maps),
|
||||
zombie_maps: length(zombie_maps),
|
||||
orphan_maps: length(orphan_maps),
|
||||
cache_inconsistencies: length(cache_inconsistencies)
|
||||
}
|
||||
|
||||
Logger.info("[Map Reconciler] Reconciliation stats: #{inspect(stats)}")
|
||||
|
||||
# Emit telemetry
|
||||
:telemetry.execute(
|
||||
[:wanderer_app, :map, :reconciliation],
|
||||
stats,
|
||||
%{}
|
||||
)
|
||||
|
||||
# Clean up zombie maps
|
||||
cleanup_zombie_maps(zombie_maps)
|
||||
|
||||
# Fix orphan maps
|
||||
fix_orphan_maps(orphan_maps)
|
||||
|
||||
# Fix cache inconsistencies
|
||||
fix_cache_inconsistencies(cache_inconsistencies)
|
||||
|
||||
Logger.debug("[Map Reconciler] State reconciliation completed")
|
||||
end
|
||||
|
||||
defp get_all_registry_maps do
|
||||
case Registry.lookup(@registry, WandererApp.Map.MapPool) do
|
||||
[] ->
|
||||
[]
|
||||
|
||||
pools ->
|
||||
pools
|
||||
|> Enum.flat_map(fn {_pid, uuid} ->
|
||||
case Registry.lookup(
|
||||
@unique_registry,
|
||||
Module.concat(WandererApp.Map.MapPool, uuid)
|
||||
) do
|
||||
[{_pool_pid, map_ids}] -> map_ids
|
||||
_ -> []
|
||||
end
|
||||
end)
|
||||
|> Enum.uniq()
|
||||
end
|
||||
end
|
||||
|
||||
defp find_cache_inconsistencies(registry_maps) do
|
||||
registry_maps
|
||||
|> Enum.filter(fn map_id ->
|
||||
case Cachex.get(@cache, map_id) do
|
||||
{:ok, nil} ->
|
||||
# Map in registry but not in cache
|
||||
true
|
||||
|
||||
{:ok, pool_uuid} ->
|
||||
# Check if the pool_uuid actually exists in registry
|
||||
case Registry.lookup(
|
||||
@unique_registry,
|
||||
Module.concat(WandererApp.Map.MapPool, pool_uuid)
|
||||
) do
|
||||
[] ->
|
||||
# Cache points to non-existent pool
|
||||
true
|
||||
|
||||
[{_pool_pid, map_ids}] ->
|
||||
# Check if this map is actually in the pool's map_ids
|
||||
map_id not in map_ids
|
||||
|
||||
_ ->
|
||||
false
|
||||
end
|
||||
|
||||
{:error, _} ->
|
||||
true
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
defp cleanup_zombie_maps([]), do: :ok
|
||||
|
||||
defp cleanup_zombie_maps(zombie_maps) do
|
||||
Logger.warning("[Map Reconciler] Found #{length(zombie_maps)} zombie maps: #{inspect(zombie_maps)}")
|
||||
|
||||
Enum.each(zombie_maps, fn map_id ->
|
||||
Logger.info("[Map Reconciler] Cleaning up zombie map: #{map_id}")
|
||||
|
||||
# Remove from started_maps cache
|
||||
WandererApp.Cache.insert_or_update(
|
||||
"started_maps",
|
||||
[],
|
||||
fn started_maps ->
|
||||
started_maps |> Enum.reject(fn started_map_id -> started_map_id == map_id end)
|
||||
end
|
||||
)
|
||||
|
||||
# Clean up any stale map_pool_cache entries
|
||||
Cachex.del(@cache, map_id)
|
||||
|
||||
# Clean up map-specific caches
|
||||
WandererApp.Cache.delete("map_#{map_id}:started")
|
||||
WandererApp.Cache.delete("map_characters-#{map_id}")
|
||||
WandererApp.Map.CacheRTree.clear_tree("rtree_#{map_id}")
|
||||
WandererApp.Map.delete_map_state(map_id)
|
||||
|
||||
:telemetry.execute(
|
||||
[:wanderer_app, :map, :reconciliation, :zombie_cleanup],
|
||||
%{count: 1},
|
||||
%{map_id: map_id}
|
||||
)
|
||||
end)
|
||||
end
|
||||
|
||||
defp fix_orphan_maps([]), do: :ok
|
||||
|
||||
defp fix_orphan_maps(orphan_maps) do
|
||||
Logger.warning("[Map Reconciler] Found #{length(orphan_maps)} orphan maps: #{inspect(orphan_maps)}")
|
||||
|
||||
Enum.each(orphan_maps, fn map_id ->
|
||||
Logger.info("[Map Reconciler] Fixing orphan map: #{map_id}")
|
||||
|
||||
# Add to started_maps cache
|
||||
WandererApp.Cache.insert_or_update(
|
||||
"started_maps",
|
||||
[map_id],
|
||||
fn existing ->
|
||||
[map_id | existing] |> Enum.uniq()
|
||||
end
|
||||
)
|
||||
|
||||
:telemetry.execute(
|
||||
[:wanderer_app, :map, :reconciliation, :orphan_fixed],
|
||||
%{count: 1},
|
||||
%{map_id: map_id}
|
||||
)
|
||||
end)
|
||||
end
|
||||
|
||||
defp fix_cache_inconsistencies([]), do: :ok
|
||||
|
||||
defp fix_cache_inconsistencies(inconsistent_maps) do
|
||||
Logger.warning(
|
||||
"[Map Reconciler] Found #{length(inconsistent_maps)} cache inconsistencies: #{inspect(inconsistent_maps)}"
|
||||
)
|
||||
|
||||
Enum.each(inconsistent_maps, fn map_id ->
|
||||
Logger.info("[Map Reconciler] Fixing cache inconsistency for map: #{map_id}")
|
||||
|
||||
# Find the correct pool for this map
|
||||
case find_pool_for_map(map_id) do
|
||||
{:ok, pool_uuid} ->
|
||||
Logger.info("[Map Reconciler] Updating cache: #{map_id} -> #{pool_uuid}")
|
||||
Cachex.put(@cache, map_id, pool_uuid)
|
||||
|
||||
:telemetry.execute(
|
||||
[:wanderer_app, :map, :reconciliation, :cache_fixed],
|
||||
%{count: 1},
|
||||
%{map_id: map_id, pool_uuid: pool_uuid}
|
||||
)
|
||||
|
||||
:error ->
|
||||
Logger.warning("[Map Reconciler] Could not find pool for map #{map_id}, removing from cache")
|
||||
Cachex.del(@cache, map_id)
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
defp find_pool_for_map(map_id) do
|
||||
case Registry.lookup(@registry, WandererApp.Map.MapPool) do
|
||||
[] ->
|
||||
:error
|
||||
|
||||
pools ->
|
||||
pools
|
||||
|> Enum.find_value(:error, fn {_pid, uuid} ->
|
||||
case Registry.lookup(
|
||||
@unique_registry,
|
||||
Module.concat(WandererApp.Map.MapPool, uuid)
|
||||
) do
|
||||
[{_pool_pid, map_ids}] ->
|
||||
if map_id in map_ids do
|
||||
{:ok, uuid}
|
||||
else
|
||||
nil
|
||||
end
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
end)
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -11,7 +11,9 @@ defmodule WandererApp.Server.ServerStatusTracker do
|
||||
:server_version,
|
||||
:start_time,
|
||||
:vip,
|
||||
:retries
|
||||
:retries,
|
||||
:in_forced_downtime,
|
||||
:downtime_notified
|
||||
]
|
||||
|
||||
@retries_count 3
|
||||
@@ -21,9 +23,17 @@ defmodule WandererApp.Server.ServerStatusTracker do
|
||||
retries: @retries_count,
|
||||
server_version: "0",
|
||||
start_time: "0",
|
||||
vip: true
|
||||
vip: true,
|
||||
in_forced_downtime: false,
|
||||
downtime_notified: false
|
||||
}
|
||||
|
||||
# EVE Online daily downtime period (UTC/GMT)
|
||||
@downtime_start_hour 10
|
||||
@downtime_start_minute 58
|
||||
@downtime_end_hour 11
|
||||
@downtime_end_minute 2
|
||||
|
||||
@refresh_interval :timer.minutes(1)
|
||||
|
||||
@logger Application.compile_env(:wanderer_app, :logger)
|
||||
@@ -57,13 +67,51 @@ defmodule WandererApp.Server.ServerStatusTracker do
|
||||
def handle_info(
|
||||
:refresh_status,
|
||||
%{
|
||||
retries: retries
|
||||
retries: retries,
|
||||
in_forced_downtime: was_in_downtime
|
||||
} = state
|
||||
) do
|
||||
Process.send_after(self(), :refresh_status, @refresh_interval)
|
||||
Task.async(fn -> get_server_status(retries) end)
|
||||
|
||||
{:noreply, state}
|
||||
in_downtime = in_forced_downtime?()
|
||||
|
||||
cond do
|
||||
# Entering downtime period - broadcast offline status immediately
|
||||
in_downtime and not was_in_downtime ->
|
||||
@logger.info("#{__MODULE__} entering forced downtime period (10:58-11:02 GMT)")
|
||||
|
||||
downtime_status = %{
|
||||
players: 0,
|
||||
server_version: "downtime",
|
||||
start_time: DateTime.utc_now() |> DateTime.to_iso8601(),
|
||||
vip: true
|
||||
}
|
||||
|
||||
Phoenix.PubSub.broadcast(
|
||||
WandererApp.PubSub,
|
||||
"server_status",
|
||||
{:server_status, downtime_status}
|
||||
)
|
||||
|
||||
{:noreply,
|
||||
%{state | in_forced_downtime: true, downtime_notified: true}
|
||||
|> Map.merge(downtime_status)}
|
||||
|
||||
# Currently in downtime - skip API call
|
||||
in_downtime ->
|
||||
{:noreply, state}
|
||||
|
||||
# Exiting downtime period - resume normal operations
|
||||
not in_downtime and was_in_downtime ->
|
||||
@logger.info("#{__MODULE__} exiting forced downtime period, resuming normal operations")
|
||||
Task.async(fn -> get_server_status(retries) end)
|
||||
{:noreply, %{state | in_forced_downtime: false, downtime_notified: false}}
|
||||
|
||||
# Normal operation
|
||||
true ->
|
||||
Task.async(fn -> get_server_status(retries) end)
|
||||
{:noreply, state}
|
||||
end
|
||||
end
|
||||
|
||||
@impl true
|
||||
@@ -155,4 +203,19 @@ defmodule WandererApp.Server.ServerStatusTracker do
|
||||
vip: false
|
||||
}
|
||||
end
|
||||
|
||||
# Checks if the current UTC time falls within the forced downtime period (10:58-11:02 GMT).
|
||||
defp in_forced_downtime? do
|
||||
now = DateTime.utc_now()
|
||||
current_hour = now.hour
|
||||
current_minute = now.minute
|
||||
|
||||
# Convert times to minutes since midnight for easier comparison
|
||||
current_time_minutes = current_hour * 60 + current_minute
|
||||
downtime_start_minutes = @downtime_start_hour * 60 + @downtime_start_minute
|
||||
downtime_end_minutes = @downtime_end_hour * 60 + @downtime_end_minute
|
||||
|
||||
current_time_minutes >= downtime_start_minutes and
|
||||
current_time_minutes < downtime_end_minutes
|
||||
end
|
||||
end
|
||||
|
||||
@@ -0,0 +1,343 @@
|
||||
defmodule WandererApp.Map.MapPoolTest do
|
||||
use ExUnit.Case, async: false
|
||||
|
||||
alias WandererApp.Map.{MapPool, MapPoolDynamicSupervisor, Reconciler}
|
||||
|
||||
@cache :map_pool_cache
|
||||
@registry :map_pool_registry
|
||||
@unique_registry :unique_map_pool_registry
|
||||
|
||||
setup do
|
||||
# Clean up any existing test data
|
||||
cleanup_test_data()
|
||||
|
||||
# Check if required infrastructure is running
|
||||
registries_running? =
|
||||
try do
|
||||
Registry.keys(@registry, self()) != :error
|
||||
rescue
|
||||
_ -> false
|
||||
end
|
||||
|
||||
reconciler_running? = Process.whereis(Reconciler) != nil
|
||||
|
||||
on_exit(fn ->
|
||||
cleanup_test_data()
|
||||
end)
|
||||
|
||||
{:ok, registries_running: registries_running?, reconciler_running: reconciler_running?}
|
||||
end
|
||||
|
||||
defp cleanup_test_data do
|
||||
# Clean up test caches
|
||||
WandererApp.Cache.delete("started_maps")
|
||||
Cachex.clear(@cache)
|
||||
end
|
||||
|
||||
describe "garbage collection with synchronous stop" do
|
||||
@tag :skip
|
||||
test "garbage collector successfully stops map with synchronous call" do
|
||||
# This test would require setting up a full map pool with a test map
|
||||
# Skipping for now as it requires more complex setup with actual map data
|
||||
:ok
|
||||
end
|
||||
|
||||
@tag :skip
|
||||
test "garbage collector handles stop failures gracefully" do
|
||||
# This test would verify error handling when stop fails
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
describe "cache lookup with registry fallback" do
|
||||
test "stop_map handles cache miss by scanning registry", %{registries_running: registries_running?} do
|
||||
if registries_running? do
|
||||
# Setup: Create a map_id that's not in cache but will be found in registry scan
|
||||
map_id = "test_map_#{:rand.uniform(1_000_000)}"
|
||||
|
||||
# Verify cache is empty for this map
|
||||
assert {:ok, nil} = Cachex.get(@cache, map_id)
|
||||
|
||||
# Call stop_map - should handle gracefully with fallback
|
||||
assert :ok = MapPoolDynamicSupervisor.stop_map(map_id)
|
||||
else
|
||||
# Skip test if registries not running
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
test "stop_map handles non-existent pool_uuid in registry", %{registries_running: registries_running?} do
|
||||
if registries_running? do
|
||||
map_id = "test_map_#{:rand.uniform(1_000_000)}"
|
||||
fake_uuid = "fake_uuid_#{:rand.uniform(1_000_000)}"
|
||||
|
||||
# Put fake uuid in cache that doesn't exist in registry
|
||||
Cachex.put(@cache, map_id, fake_uuid)
|
||||
|
||||
# Call stop_map - should handle gracefully with fallback
|
||||
assert :ok = MapPoolDynamicSupervisor.stop_map(map_id)
|
||||
else
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
test "stop_map updates cache when found via registry scan", %{registries_running: registries_running?} do
|
||||
if registries_running? do
|
||||
# This test would require a running pool with registered maps
|
||||
# For now, we verify the fallback logic doesn't crash
|
||||
map_id = "test_map_#{:rand.uniform(1_000_000)}"
|
||||
assert :ok = MapPoolDynamicSupervisor.stop_map(map_id)
|
||||
else
|
||||
:ok
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "state cleanup atomicity" do
|
||||
@tag :skip
|
||||
test "rollback occurs when registry update fails" do
|
||||
# This would require mocking Registry.update_value to fail
|
||||
# Skipping for now as it requires more complex mocking setup
|
||||
:ok
|
||||
end
|
||||
|
||||
@tag :skip
|
||||
test "rollback occurs when cache delete fails" do
|
||||
# This would require mocking Cachex.del to fail
|
||||
:ok
|
||||
end
|
||||
|
||||
@tag :skip
|
||||
test "successful cleanup updates all three state stores" do
|
||||
# This would verify Registry, Cache, and GenServer state are all updated
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
describe "Reconciler - zombie map detection and cleanup" do
|
||||
test "reconciler detects zombie maps in started_maps cache", %{reconciler_running: reconciler_running?} do
|
||||
if reconciler_running? do
|
||||
# Setup: Add maps to started_maps that aren't in any registry
|
||||
zombie_map_id = "zombie_map_#{:rand.uniform(1_000_000)}"
|
||||
|
||||
WandererApp.Cache.insert_or_update(
|
||||
"started_maps",
|
||||
[zombie_map_id],
|
||||
fn existing -> [zombie_map_id | existing] |> Enum.uniq() end
|
||||
)
|
||||
|
||||
# Get started_maps
|
||||
{:ok, started_maps} = WandererApp.Cache.lookup("started_maps", [])
|
||||
assert zombie_map_id in started_maps
|
||||
|
||||
# Trigger reconciliation
|
||||
send(Reconciler, :reconcile)
|
||||
# Give it time to process
|
||||
Process.sleep(200)
|
||||
|
||||
# Verify zombie was cleaned up
|
||||
{:ok, started_maps_after} = WandererApp.Cache.lookup("started_maps", [])
|
||||
refute zombie_map_id in started_maps_after
|
||||
else
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
test "reconciler cleans up zombie map caches", %{reconciler_running: reconciler_running?} do
|
||||
if reconciler_running? do
|
||||
zombie_map_id = "zombie_map_#{:rand.uniform(1_000_000)}"
|
||||
|
||||
# Setup zombie state
|
||||
WandererApp.Cache.insert_or_update(
|
||||
"started_maps",
|
||||
[zombie_map_id],
|
||||
fn existing -> [zombie_map_id | existing] |> Enum.uniq() end
|
||||
)
|
||||
|
||||
WandererApp.Cache.insert("map_#{zombie_map_id}:started", true)
|
||||
Cachex.put(@cache, zombie_map_id, "fake_uuid")
|
||||
|
||||
# Trigger reconciliation
|
||||
send(Reconciler, :reconcile)
|
||||
Process.sleep(200)
|
||||
|
||||
# Verify all caches cleaned
|
||||
{:ok, started_maps} = WandererApp.Cache.lookup("started_maps", [])
|
||||
refute zombie_map_id in started_maps
|
||||
|
||||
{:ok, cache_entry} = Cachex.get(@cache, zombie_map_id)
|
||||
assert cache_entry == nil
|
||||
else
|
||||
:ok
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "Reconciler - orphan map detection and fix" do
|
||||
@tag :skip
|
||||
test "reconciler detects orphan maps in registry" do
|
||||
# This would require setting up a pool with maps in registry
|
||||
# but not in started_maps cache
|
||||
:ok
|
||||
end
|
||||
|
||||
@tag :skip
|
||||
test "reconciler adds orphan maps to started_maps cache" do
|
||||
# This would verify orphan maps get added to the cache
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
describe "Reconciler - cache inconsistency detection and fix" do
|
||||
test "reconciler detects map with missing cache entry", %{reconciler_running: reconciler_running?} do
|
||||
if reconciler_running? do
|
||||
# This test verifies the reconciler can detect when a map
|
||||
# is in the registry but has no cache entry
|
||||
# Since we can't easily set up a full pool, we test the detection logic
|
||||
|
||||
map_id = "test_map_#{:rand.uniform(1_000_000)}"
|
||||
|
||||
# Ensure no cache entry
|
||||
Cachex.del(@cache, map_id)
|
||||
|
||||
# The reconciler would detect this if the map was in a registry
|
||||
# For now, we just verify the logic doesn't crash
|
||||
send(Reconciler, :reconcile)
|
||||
Process.sleep(200)
|
||||
|
||||
# No assertions needed - just verifying no crashes
|
||||
end
|
||||
end
|
||||
|
||||
test "reconciler detects cache pointing to non-existent pool", %{reconciler_running: reconciler_running?} do
|
||||
if reconciler_running? do
|
||||
map_id = "test_map_#{:rand.uniform(1_000_000)}"
|
||||
fake_uuid = "fake_uuid_#{:rand.uniform(1_000_000)}"
|
||||
|
||||
# Put fake uuid in cache
|
||||
Cachex.put(@cache, map_id, fake_uuid)
|
||||
|
||||
# Trigger reconciliation
|
||||
send(Reconciler, :reconcile)
|
||||
Process.sleep(200)
|
||||
|
||||
# Cache entry should be removed since pool doesn't exist
|
||||
{:ok, cache_entry} = Cachex.get(@cache, map_id)
|
||||
assert cache_entry == nil
|
||||
else
|
||||
:ok
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "Reconciler - stats and telemetry" do
|
||||
test "reconciler emits telemetry events", %{reconciler_running: reconciler_running?} do
|
||||
if reconciler_running? do
|
||||
# Setup telemetry handler
|
||||
test_pid = self()
|
||||
|
||||
:telemetry.attach(
|
||||
"test-reconciliation",
|
||||
[:wanderer_app, :map, :reconciliation],
|
||||
fn _event, measurements, _metadata, _config ->
|
||||
send(test_pid, {:telemetry, measurements})
|
||||
end,
|
||||
nil
|
||||
)
|
||||
|
||||
# Trigger reconciliation
|
||||
send(Reconciler, :reconcile)
|
||||
Process.sleep(200)
|
||||
|
||||
# Should receive telemetry event
|
||||
assert_receive {:telemetry, measurements}, 500
|
||||
|
||||
assert is_integer(measurements.total_started_maps)
|
||||
assert is_integer(measurements.total_registry_maps)
|
||||
assert is_integer(measurements.zombie_maps)
|
||||
assert is_integer(measurements.orphan_maps)
|
||||
assert is_integer(measurements.cache_inconsistencies)
|
||||
|
||||
# Cleanup
|
||||
:telemetry.detach("test-reconciliation")
|
||||
else
|
||||
:ok
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "Reconciler - manual trigger" do
|
||||
test "trigger_reconciliation runs reconciliation immediately", %{reconciler_running: reconciler_running?} do
|
||||
if reconciler_running? do
|
||||
zombie_map_id = "zombie_map_#{:rand.uniform(1_000_000)}"
|
||||
|
||||
# Setup zombie state
|
||||
WandererApp.Cache.insert_or_update(
|
||||
"started_maps",
|
||||
[zombie_map_id],
|
||||
fn existing -> [zombie_map_id | existing] |> Enum.uniq() end
|
||||
)
|
||||
|
||||
# Verify it exists
|
||||
{:ok, started_maps_before} = WandererApp.Cache.lookup("started_maps", [])
|
||||
assert zombie_map_id in started_maps_before
|
||||
|
||||
# Trigger manual reconciliation
|
||||
Reconciler.trigger_reconciliation()
|
||||
Process.sleep(200)
|
||||
|
||||
# Verify zombie was cleaned up
|
||||
{:ok, started_maps_after} = WandererApp.Cache.lookup("started_maps", [])
|
||||
refute zombie_map_id in started_maps_after
|
||||
else
|
||||
:ok
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "edge cases and error handling" do
|
||||
test "stop_map with cache error returns ok", %{registries_running: registries_running?} do
|
||||
if registries_running? do
|
||||
map_id = "test_map_#{:rand.uniform(1_000_000)}"
|
||||
|
||||
# Even if cache operations fail, should return :ok
|
||||
assert :ok = MapPoolDynamicSupervisor.stop_map(map_id)
|
||||
else
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
test "reconciler handles empty registries gracefully", %{reconciler_running: reconciler_running?} do
|
||||
if reconciler_running? do
|
||||
# Clear everything
|
||||
cleanup_test_data()
|
||||
|
||||
# Should not crash even with empty data
|
||||
send(Reconciler, :reconcile)
|
||||
Process.sleep(200)
|
||||
|
||||
# No assertions - just verifying no crash
|
||||
assert true
|
||||
else
|
||||
:ok
|
||||
end
|
||||
end
|
||||
|
||||
test "reconciler handles nil values in caches", %{reconciler_running: reconciler_running?} do
|
||||
if reconciler_running? do
|
||||
map_id = "test_map_#{:rand.uniform(1_000_000)}"
|
||||
|
||||
# Explicitly set nil
|
||||
Cachex.put(@cache, map_id, nil)
|
||||
|
||||
# Should handle gracefully
|
||||
send(Reconciler, :reconcile)
|
||||
Process.sleep(200)
|
||||
|
||||
assert true
|
||||
else
|
||||
:ok
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user