// Metal Device detection and handling for Apple Silicon // Specifically optimized for M-series chips using proper system detection const std = @import("std"); const Allocator = std.mem.Allocator; const c = std.c; // Device information structure pub const MetalDeviceInfo = struct { device_name: []const u8, is_apple_silicon: bool, is_m_series: bool, series_generation: u8, // 1 = M1, 2 = M2, 3 = M3, etc. variant: []const u8, // "Pro", "Max", "Ultra", etc. unified_memory_size: u64, has_anc: bool, // Apple Neural Engine pub fn format( self: @This(), comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype, ) !void { _ = fmt; _ = options; try writer.print("Metal Device: {s} ({s}{d} {s})", .{ self.device_name, if (self.is_m_series) "M" else "", if (self.is_m_series) self.series_generation else 0, if (self.is_m_series) self.variant else "", }); try writer.print("\nUnified Memory: {} GB", .{self.unified_memory_size / (1024 * 1024 * 1024)}); try writer.print("\nApple Neural Engine: {}", .{if (self.has_anc) "Available" else "Not Available"}); } }; // M-series chip information const MSeriesInfo = struct { is_m_series: bool, generation: u8, variant: []const u8, }; // System detection using sysctl const SysctlError = error{ NotFound, BufferTooSmall, SystemError, }; /// Get sysctl string value fn getSysctlString(allocator: Allocator, name: []const u8) ![]const u8 { // Only available on macOS if (@import("builtin").os.tag != .macos) { return SysctlError.NotFound; } var size: usize = 0; // First, get the size needed const name_cstr = try allocator.dupeZ(u8, name); defer allocator.free(name_cstr); if (c.sysctlbyname(name_cstr.ptr, null, &size, null, 0) != 0) { return SysctlError.NotFound; } // Allocate buffer and get the actual value const buffer = try allocator.alloc(u8, size); defer allocator.free(buffer); if (c.sysctlbyname(name_cstr.ptr, buffer.ptr, &size, null, 0) != 0) { return SysctlError.SystemError; } // Return a copy of the string (minus null terminator if present) const len = if (size > 0 and buffer[size - 1] == 0) size - 1 else size; return try allocator.dupe(u8, buffer[0..len]); } /// Get sysctl integer value fn getSysctlInt(comptime T: type, name: []const u8, allocator: Allocator) !T { if (@import("builtin").os.tag != .macos) { return SysctlError.NotFound; } var value: T = 0; var size: usize = @sizeOf(T); const name_cstr = try allocator.dupeZ(u8, name); defer allocator.free(name_cstr); if (c.sysctlbyname(name_cstr.ptr, &value, &size, null, 0) != 0) { return SysctlError.NotFound; } return value; } /// Check if running under Rosetta 2 translation fn isRunningUnderRosetta(allocator: Allocator) bool { const result = getSysctlInt(i32, "sysctl.proc_translated", allocator) catch return false; return result == 1; } /// Check if hardware supports ARM64 (Apple Silicon) fn isAppleSiliconHardware(allocator: Allocator) bool { // Check for ARM64 support const arm64_support = getSysctlInt(i32, "hw.optional.arm64", allocator) catch return false; if (arm64_support == 1) return true; // Alternative check: CPU architecture if (@import("builtin").target.cpu.arch == .aarch64) return true; // If running under Rosetta, we're on Apple Silicon return isRunningUnderRosetta(allocator); } /// Parse M-series information from CPU brand string fn parseMSeriesInfo(cpu_brand: []const u8) MSeriesInfo { // Default values var result = MSeriesInfo{ .is_m_series = false, .generation = 0, .variant = "", }; // Look for Apple M pattern if (std.mem.indexOf(u8, cpu_brand, "Apple M") == null) { return result; } result.is_m_series = true; // Extract generation and variant from CPU brand string // Examples: "Apple M1", "Apple M1 Pro", "Apple M1 Max", "Apple M1 Ultra" if (std.mem.indexOf(u8, cpu_brand, "M1")) |_| { result.generation = 1; if (std.mem.indexOf(u8, cpu_brand, " Pro")) |_| { result.variant = "Pro"; } else if (std.mem.indexOf(u8, cpu_brand, " Max")) |_| { result.variant = "Max"; } else if (std.mem.indexOf(u8, cpu_brand, " Ultra")) |_| { result.variant = "Ultra"; } else { // Just "Apple M1" - this is the regular M1 result.variant = ""; } } else if (std.mem.indexOf(u8, cpu_brand, "M2")) |_| { result.generation = 2; if (std.mem.indexOf(u8, cpu_brand, " Pro")) |_| { result.variant = "Pro"; } else if (std.mem.indexOf(u8, cpu_brand, " Max")) |_| { result.variant = "Max"; } else if (std.mem.indexOf(u8, cpu_brand, " Ultra")) |_| { result.variant = "Ultra"; } else { result.variant = ""; } } else if (std.mem.indexOf(u8, cpu_brand, "M3")) |_| { result.generation = 3; if (std.mem.indexOf(u8, cpu_brand, " Pro")) |_| { result.variant = "Pro"; } else if (std.mem.indexOf(u8, cpu_brand, " Max")) |_| { result.variant = "Max"; } else if (std.mem.indexOf(u8, cpu_brand, " Ultra")) |_| { result.variant = "Ultra"; } else { result.variant = ""; } } else if (std.mem.indexOf(u8, cpu_brand, "M4")) |_| { result.generation = 4; if (std.mem.indexOf(u8, cpu_brand, " Pro")) |_| { result.variant = "Pro"; } else if (std.mem.indexOf(u8, cpu_brand, " Max")) |_| { result.variant = "Max"; } else if (std.mem.indexOf(u8, cpu_brand, " Ultra")) |_| { result.variant = "Ultra"; } else { result.variant = ""; } } return result; } /// Try to detect GPU configuration for more detailed chip identification fn detectGPUCores(allocator: Allocator) u32 { // Try to get GPU core count - this can help distinguish variants // Regular M1: 7-8 GPU cores // M1 Pro: 14-16 GPU cores // M1 Max: 24-32 GPU cores // This is a placeholder - actual implementation would query Metal API // For now, return 0 to indicate unknown _ = allocator; return 0; } /// Detect Apple Silicon and M-series chip capabilities using proper system detection pub fn detectAppleSilicon(allocator: Allocator) !MetalDeviceInfo { // Check at compile-time if we're on macOS const is_macos = @import("builtin").os.tag == .macos; if (!is_macos) { return MetalDeviceInfo{ .device_name = try allocator.dupe(u8, "Non-macOS Device"), .is_apple_silicon = false, .is_m_series = false, .series_generation = 0, .variant = try allocator.dupe(u8, ""), .unified_memory_size = 0, .has_anc = false, }; } // Detect Apple Silicon hardware const is_apple_silicon = isAppleSiliconHardware(allocator); if (!is_apple_silicon) { return MetalDeviceInfo{ .device_name = try allocator.dupe(u8, "Intel Mac"), .is_apple_silicon = false, .is_m_series = false, .series_generation = 0, .variant = try allocator.dupe(u8, ""), .unified_memory_size = 0, .has_anc = false, }; } // Get CPU brand string for M-series detection - this is the authoritative source const cpu_brand = getSysctlString(allocator, "machdep.cpu.brand_string") catch "Apple Silicon"; defer allocator.free(cpu_brand); std.log.debug("CPU Brand String: '{s}'", .{cpu_brand}); // Parse M-series information from the actual CPU brand string const m_info = parseMSeriesInfo(cpu_brand); // Get additional hardware details for logging/debugging const hw_model = getSysctlString(allocator, "hw.model") catch ""; defer if (hw_model.len > 0) allocator.free(hw_model); const gpu_cores = detectGPUCores(allocator); if (gpu_cores > 0) { std.log.debug("GPU Cores: {}", .{gpu_cores}); } std.log.debug("Hardware Model: '{s}'", .{hw_model}); std.log.debug("Detected M{d} {s}", .{ m_info.generation, m_info.variant }); // Get system memory const memory_size = getSysctlInt(u64, "hw.memsize", allocator) catch (16 * 1024 * 1024 * 1024); // Default 16GB // Get device name const device_name = getSysctlString(allocator, "hw.model") catch "Apple Silicon Mac"; return MetalDeviceInfo{ .device_name = device_name, // This will be owned by the caller .is_apple_silicon = true, .is_m_series = m_info.is_m_series, .series_generation = m_info.generation, .variant = try allocator.dupe(u8, m_info.variant), // Duplicate to ensure consistent allocation .unified_memory_size = memory_size, .has_anc = m_info.is_m_series, // All M-series have Apple Neural Engine }; } /// Get optimal GPU parameters for detected device pub fn getOptimalWorkGroupSize() u32 { // These are reasonable defaults that should work well on most Apple GPU architectures // In a real implementation, we would query Metal API for the actual optimal values if (@import("builtin").target.cpu.arch == .aarch64) { // Apple Silicon optimized values based on GPU core count return 128; } // Default for Intel Macs and others return 64; } /// Get recommended memory allocation strategy based on device capabilities pub fn getMemoryStrategy() enum { UnifiedMemory, DiscreteMemory } { // Check if we're on Apple Silicon hardware (even under Rosetta) if (@import("builtin").os.tag == .macos) { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); const allocator = gpa.allocator(); if (isAppleSiliconHardware(allocator)) { return .UnifiedMemory; // Apple Silicon uses unified memory } } // For Intel Macs and other platforms return .DiscreteMemory; } /// Get optimal tensor block size for current device pub fn getOptimalTensorBlockSize() u32 { if (@import("builtin").target.cpu.arch == .aarch64) { // Apple Silicon has more GPU cores and benefits from larger blocks return 256; } else { return 128; } }