Improve csv file encoding support for imports

2025-06-04 10:22:12 +10:00 · 2025-06-04 10:22:12 +10:00 · daf4391a30
parent f4533421f1
commit daf4391a30
8 changed files with 1375 additions and 55 deletions
--- a/app/Http/Controllers/BaseController.php
+++ b/app/Http/Controllers/BaseController.php
@ -88,9 +88,9 @@ class BaseController extends Controller

    /* Grouped permissions when we want to hide columns for particular permission groups*/

-    private array $client_exclusion_fields = ['balance', 'paid_to_date', 'credit_balance', 'client_hash'];
-    private array $client_excludable_permissions = ['view_client'];
-    private array $client_excludable_overrides = ['edit_client', 'edit_all', 'view_invoice', 'view_all', 'edit_invoice'];
+    protected array $client_exclusion_fields = ['balance', 'paid_to_date', 'credit_balance', 'client_hash'];
+    protected array $client_excludable_permissions = ['view_client'];
+    protected array $client_excludable_overrides = ['edit_client', 'edit_all', 'view_invoice', 'view_all', 'edit_invoice'];

    /* Grouped permissions when we want to hide columns for particular permission groups*/

--- a/app/Http/Controllers/ClientController.php
+++ b/app/Http/Controllers/ClientController.php
@ -112,6 +112,12 @@ class ClientController extends BaseController
     */
    public function show(ShowClientRequest $request, Client $client)
    {
+        nlog("show");
+        if(auth()->user()->hasExcludedPermissions($this->client_excludable_permissions, $this->client_excludable_overrides)){
+            nlog('hiding fields');
+            $client->makeHidden($this->client_exclusion_fields);
+        }
+
        return $this->itemResponse($client);
    }

@ -125,6 +131,12 @@ class ClientController extends BaseController
     */
    public function edit(EditClientRequest $request, Client $client)
    {
+        nlog("Edit");
+        if (auth()->user()->hasExcludedPermissions($this->client_excludable_permissions, $this->client_excludable_overrides)) {
+            nlog('hiding fields');
+            $client->makeHidden($this->client_exclusion_fields);
+        }
+
        return $this->itemResponse($client);
    }

--- a/app/Http/Controllers/ImportController.php
+++ b/app/Http/Controllers/ImportController.php
@ -83,8 +83,8 @@ class ImportController extends Controller
        ];
        /** @var UploadedFile $file */
        foreach ($request->files->get('files') as $entityType => $file) {
-            $contents = file_get_contents($file->getPathname());
-            // Store the csv in cache with an expiry of 10 minutes
+            $contents = $this->readFileWithProperEncoding($file->getPathname());
+
            Cache::put($hash.'-'.$entityType, base64_encode($contents), 1200);

            // Parse CSV
@ -104,6 +104,224 @@ class ImportController extends Controller
        return response()->json($data);
    }

+    private function readFileWithProperEncoding(string $filePath): string
+    {
+        // First, read the file and check if it's already clean UTF-8
+        $contents = @file_get_contents($filePath);
+        if ($contents === false) {
+            return '';
+        }
+
+        // Check for different UTF BOMs and handle accordingly
+        $bomResult = $this->detectAndHandleUTFEncoding($contents);
+        if ($bomResult !== null) {
+            return $bomResult;
+        }
+
+        // Remove BOM if present (for UTF-8 BOM)
+        $contents = $this->removeBOM($contents);
+
+        // Check if it's clean UTF-8 first (no conversion needed)
+        if (mb_check_encoding($contents, 'UTF-8') && $this->isValidConversion($contents)) {
+            return $contents;
+        }
+
+        // Method 1: Try reading with explicit Windows-1252 context
+        $context = stream_context_create([
+            'file' => [
+                'encoding' => 'WINDOWS-1252'
+            ]
+        ]);
+        
+        $contextContents = @file_get_contents($filePath, false, $context);
+        if ($contextContents !== false) {
+            $contextContents = $this->removeBOM($contextContents);
+            $converted = mb_convert_encoding($contextContents, 'UTF-8', 'WINDOWS-1252');
+            if ($this->isValidConversion($converted)) {
+                return $converted;
+            }
+        }
+
+        // Method 2: Binary read with forced Windows-1252 conversion
+        $handle = @fopen($filePath, 'rb');
+        if ($handle) {
+            $binaryContents = fread($handle, filesize($filePath));
+            fclose($handle);
+            
+            $binaryContents = $this->removeBOM($binaryContents);
+            
+            // Check if this looks like Windows-1252 by looking for problem bytes
+            if ($this->containsWindows1252Bytes($binaryContents)) {
+                $converted = mb_convert_encoding($binaryContents, 'UTF-8', 'WINDOWS-1252');
+                if ($this->isValidConversion($converted)) {
+                    return $converted;
+                }
+            }
+        }
+
+        // Method 3: Fix corrupted UTF-8 replacement characters
+        if ($contents !== false) {
+            $fixed = $this->fixCorruptedWindows1252($contents);
+            if ($this->isValidConversion($fixed)) {
+                return $fixed;
+            }
+        }
+
+        // Method 4: Try different encoding auto-detection with broader list
+        if ($contents !== false) {
+            $encodings = ['WINDOWS-1252', 'ISO-8859-1', 'ISO-8859-15', 'CP1252'];
+            foreach ($encodings as $encoding) {
+                $converted = mb_convert_encoding($contents, 'UTF-8', $encoding);
+                if ($this->isValidConversion($converted)) {
+                    return $converted;
+                }
+            }
+        }
+
+        // Fallback: return original contents
+        return $contents ?: '';
+    }
+
+    /**
+     * Detect and handle UTF-16 and UTF-32 encodings based on BOM
+     */
+    private function detectAndHandleUTFEncoding(string $data): ?string
+    {
+        // UTF-32 BE BOM
+        if (substr($data, 0, 4) === "\x00\x00\xFE\xFF") {
+            $withoutBOM = substr($data, 4);
+            return mb_convert_encoding($withoutBOM, 'UTF-8', 'UTF-32BE');
+        }
+        
+        // UTF-32 LE BOM
+        if (substr($data, 0, 4) === "\xFF\xFE\x00\x00") {
+            $withoutBOM = substr($data, 4);
+            return mb_convert_encoding($withoutBOM, 'UTF-8', 'UTF-32LE');
+        }
+        
+        // UTF-16 BE BOM
+        if (substr($data, 0, 2) === "\xFE\xFF") {
+            $withoutBOM = substr($data, 2);
+            return mb_convert_encoding($withoutBOM, 'UTF-8', 'UTF-16BE');
+        }
+        
+        // UTF-16 LE BOM
+        if (substr($data, 0, 2) === "\xFF\xFE") {
+            $withoutBOM = substr($data, 2);
+            return mb_convert_encoding($withoutBOM, 'UTF-8', 'UTF-16LE');
+        }
+        
+        // Try to detect UTF-16/32 without BOM (heuristic approach)
+        $length = strlen($data);
+        
+        // UTF-32 detection (every 4th byte pattern)
+        if ($length >= 8 && $length % 4 === 0) {
+            $nullCount = 0;
+            for ($i = 0; $i < min(100, $length); $i += 4) {
+                if ($data[$i] === "\x00" && $data[$i + 1] === "\x00" && $data[$i + 2] === "\x00") {
+                    $nullCount++;
+                }
+            }
+            if ($nullCount > 5) { // Likely UTF-32LE
+                return mb_convert_encoding($data, 'UTF-8', 'UTF-32LE');
+            }
+        }
+        
+        // UTF-16 detection (every 2nd byte pattern)
+        if ($length >= 4 && $length % 2 === 0) {
+            $nullCount = 0;
+            for ($i = 0; $i < min(100, $length); $i += 2) {
+                if ($data[$i + 1] === "\x00") {
+                    $nullCount++;
+                }
+            }
+            if ($nullCount > 10) { // Likely UTF-16LE
+                return mb_convert_encoding($data, 'UTF-8', 'UTF-16LE');
+            }
+            
+            // Check for UTF-16BE
+            $nullCount = 0;
+            for ($i = 0; $i < min(100, $length); $i += 2) {
+                if ($data[$i] === "\x00") {
+                    $nullCount++;
+                }
+            }
+            if ($nullCount > 10) { // Likely UTF-16BE
+                return mb_convert_encoding($data, 'UTF-8', 'UTF-16BE');
+            }
+        }
+        
+        return null;
+    }
+
+    /**
+     * Remove BOM (Byte Order Mark) from the beginning of a string
+     */
+    private function removeBOM(string $data): string
+    {
+        // UTF-8 BOM
+        if (substr($data, 0, 3) === "\xEF\xBB\xBF") {
+            return substr($data, 3);
+        }
+        
+        // UTF-16 BE BOM
+        if (substr($data, 0, 2) === "\xFE\xFF") {
+            return substr($data, 2);
+        }
+        
+        // UTF-16 LE BOM
+        if (substr($data, 0, 2) === "\xFF\xFE") {
+            return substr($data, 2);
+        }
+        
+        // UTF-32 BE BOM
+        if (substr($data, 0, 4) === "\x00\x00\xFE\xFF") {
+            return substr($data, 4);
+        }
+        
+        // UTF-32 LE BOM
+        if (substr($data, 0, 4) === "\xFF\xFE\x00\x00") {
+            return substr($data, 4);
+        }
+        
+        return $data;
+    }
+
+    private function containsWindows1252Bytes(string $data): bool
+    {
+        // Check for Windows-1252 specific bytes in 0x80-0x9F range
+        $windows1252Bytes = [0x80, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8E, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9E, 0x9F];
+        
+        foreach ($windows1252Bytes as $byte) {
+            if (strpos($data, chr($byte)) !== false) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private function fixCorruptedWindows1252(string $data): string
+    {
+        // Map of UTF-8 replacement sequences back to proper characters
+        $replacements = [
+            "\xEF\xBF\xBD" => "\u{2019}", // Most common: right single quote (0x92) - use smart quote
+            // Add more mappings as needed based on your data
+        ];
+
+        return str_replace(array_keys($replacements), array_values($replacements), $data);
+    }
+
+    private function isValidConversion(string $data): bool
+    {
+        // Check if conversion was successful:
+        // 1. Must be valid UTF-8
+        // 2. Must NOT contain replacement characters (indicating corruption)
+        // 3. Additional check for double-encoded replacement
+        return mb_check_encoding($data, 'UTF-8') && 
+               !str_contains($data, "\xEF\xBF\xBD") &&  // UTF-8 replacement character bytes
+               !str_contains($data, 'ï¿½'); // Double-encoded replacement character
+    }
+
    private function setImportHints($entity_type, $available_keys, $headers): array
    {
        $hints = [];
@ -203,7 +421,9 @@ class ImportController extends Controller

            /** @var UploadedFile $file */
            foreach ($request->files->get('files') as $entityType => $file) {
-                $contents = file_get_contents($file->getPathname());
+                // $contents = file_get_contents($file->getPathname());
+                $contents = $this->readFileWithProperEncoding($file->getPathname());
+
                // Store the csv in cache with an expiry of 10 minutes
                Cache::put($hash.'-'.$entityType, base64_encode($contents), 600);
                nlog($hash.'-'.$entityType);
@ -248,54 +468,9 @@ class ImportController extends Controller
            }
        }

-        return $this->convertData($data);
+        return $data; // Remove the convertData call since we fixed encoding upfront
    }

-
-
-    private function convertData(array $data): array
-    {
-
-        // List of encodings to check against
-        $encodings = [
-            'UTF-8',
-            'ISO-8859-1',  // Latin-1
-            'ISO-8859-2',  // Latin-2
-            'WINDOWS-1252', // CP1252
-            'SHIFT-JIS',
-            'EUC-JP',
-            'GB2312',
-            'GBK',
-            'BIG5',
-            'ISO-2022-JP',
-            'KOI8-R',
-            'KOI8-U',
-            'WINDOWS-1251', // CP1251
-            'UTF-16',
-            'UTF-32',
-            'ASCII',
-            'WINDOWS-1254', // Turkish, which sometimes includes Georgian
-            'WINDOWS-1256', // Arabic, which sometimes includes Georgian
-            'ISO-8859-10',
-        ];
-
-        foreach ($data as $key => $value) {
-            // Only process strings
-            if (is_string($value)) {
-                // Detect the encoding of the string
-                $detectedEncoding = mb_detect_encoding($value, $encodings, true);
-
-                // If encoding is detected and it's not UTF-8, convert it to UTF-8
-                if ($detectedEncoding && $detectedEncoding !== 'UTF-8') {
-                    $array[$key] = mb_convert_encoding($value, 'UTF-8', $detectedEncoding);
-                }
-            }
-        }
-
-        return $data;
-    }
-
-
    /**
     * Returns the best delimiter
     *
--- a/app/Http/Controllers/ImportJsonController.php
+++ b/app/Http/Controllers/ImportJsonController.php
@ -107,7 +107,7 @@ class ImportJsonController extends BaseController
        return response()->json(array_merge(['message' => 'Processing','success' => true], $metadata), 200);
    }

-    private function handleChunkedUpload(ImportJsonRequest $request)
+    private function handleChunkedUploadX(ImportJsonRequest $request)
    {
        $metadata = json_decode($request->metadata, true);
        
@ -251,4 +251,146 @@ class ImportJsonController extends BaseController

        rmdir($dir);
    }
+
+    private function handleChunkedUpload(ImportJsonRequest $request)
+    {
+        $metadata = json_decode($request->metadata, true);
+        
+        // Validate metadata structure
+        if (!isset($metadata['fileHash'], $metadata['fileName'], $metadata['totalChunks'], $metadata['currentChunk'])) {
+            throw new \InvalidArgumentException('Invalid metadata structure');
+        }
+
+        // Sanitize and validate file hash (should be alphanumeric)
+        if (!preg_match('/^[a-zA-Z0-9]+$/', $metadata['fileHash'])) {
+            throw new \InvalidArgumentException('Invalid file hash format');
+        }
+
+        // Sanitize and validate filename
+        $safeFileName = basename($metadata['fileName']);
+        if ($safeFileName !== $metadata['fileName']) {
+            throw new \InvalidArgumentException('Invalid filename');
+        }
+
+        // Validate chunk number format
+        if (!is_numeric($metadata['currentChunk']) || $metadata['currentChunk'] < 0) {
+            throw new \InvalidArgumentException('Invalid chunk number');
+        }
+
+        // Validate total chunks
+        if (!is_numeric($metadata['totalChunks']) || $metadata['totalChunks'] <= 0 || $metadata['totalChunks'] > 1000) {
+            throw new \InvalidArgumentException('Invalid total chunks');
+        }
+
+        // Validate file type
+        $chunk = $request->file('file');
+        if (!$chunk || !$chunk->isValid()) {
+            throw new \InvalidArgumentException('Invalid file chunk');
+        }
+
+        // Validate file size before saving
+        $maxChunkSize = 5 * 1024 * 1024; // 5MB
+        if ($chunk->getSize() > $maxChunkSize) {
+            throw new \InvalidArgumentException('Chunk size exceeds limit');
+        }
+
+        $disk = Ninja::isHosted() ? 'backup' : config('filesystems.default');
+        
+        // Store chunk in S3 with unique path
+        $chunkKey = "tmp/chunks/{$metadata['fileHash']}/chunk-{$metadata['currentChunk']}";
+        
+        Storage::disk($disk)->put(
+            $chunkKey,
+            file_get_contents($chunk->getRealPath()),
+            ['visibility' => 'private']
+        );
+
+        // Check if all chunks are uploaded by listing S3 objects
+        $chunkPrefix = "tmp/chunks/{$metadata['fileHash']}/";
+        $uploadedChunks = collect(Storage::disk($disk)->files($chunkPrefix))
+            ->filter(function($file) {
+                return str_contains(basename($file), 'chunk-');
+            })
+            ->count();
+
+        if ($uploadedChunks >= $metadata['totalChunks']) {
+            try {
+                // Combine chunks from S3
+                $finalPath = "migrations/{$safeFileName}";
+                $this->combineChunksFromS3($disk, $metadata['fileHash'], $metadata['totalChunks'], $finalPath);
+                
+                // Clean up
+                $this->cleanupS3Chunks($disk, $metadata['fileHash']);
+                
+                $metadata['uploaded_filepath'] = $finalPath;
+                return $metadata;
+
+            } catch (\Exception $e) {
+                // Clean up on error
+                $this->cleanupS3Chunks($disk, $metadata['fileHash']);
+                throw $e;
+            }
+        }
+
+        return $metadata;
+    }
+
+    private function combineChunksFromS3(string $disk, string $fileHash, int $totalChunks, string $finalPath): void
+    {
+        // Create a temporary local file to combine chunks
+        $tempFile = tempnam(sys_get_temp_dir(), 'chunk_combine_');
+        
+        try {
+            $handle = fopen($tempFile, 'wb');
+            if ($handle === false) {
+                throw new \RuntimeException('Failed to create temporary file');
+            }
+
+            // Download and combine chunks in order
+            for ($i = 0; $i < $totalChunks; $i++) {
+                $chunkKey = "tmp/chunks/{$fileHash}/chunk-{$i}";
+                
+                if (!Storage::disk($disk)->exists($chunkKey)) {
+                    throw new \RuntimeException("Missing chunk: {$i}");
+                }
+                
+                $chunkContent = Storage::disk($disk)->get($chunkKey);
+                if ($chunkContent === null) {
+                    throw new \RuntimeException("Failed to read chunk: {$i}");
+                }
+                
+                if (fwrite($handle, $chunkContent) === false) {
+                    throw new \RuntimeException("Failed to write chunk: {$i}");
+                }
+            }
+
+            fclose($handle);
+
+            // Upload combined file to final location
+            Storage::disk($disk)->put(
+                $finalPath,
+                file_get_contents($tempFile),
+                ['visibility' => 'private']
+            );
+
+        } finally {
+            // Clean up temporary file
+            if (file_exists($tempFile)) {
+                unlink($tempFile);
+            }
+        }
+    }
+
+    private function cleanupS3Chunks(string $disk, string $fileHash): void
+    {
+        $chunkPrefix = "tmp/chunks/{$fileHash}/";
+        
+        // Get all chunk files for this upload
+        $chunkFiles = Storage::disk($disk)->files($chunkPrefix);
+        
+        // Delete all chunk files
+        if (!empty($chunkFiles)) {
+            Storage::disk($disk)->delete($chunkFiles);
+        }
+    }
 }
--- a/app/Import/Providers/BaseImport.php
+++ b/app/Import/Providers/BaseImport.php
@ -108,7 +108,7 @@ class BaseImport
        nlog("found {$entity_type}");

        $csv = base64_decode($base64_encoded_csv);
-        $csv = mb_convert_encoding($csv, 'UTF-8', 'UTF-8');
+        // $csv = mb_convert_encoding($csv, 'UTF-8', 'UTF-8');

        $csv = Reader::createFromString($csv);
        $csvdelimiter = self::detectDelimiter($csv);
--- a/app/Jobs/Invoice/ZipInvoices.php
+++ b/app/Jobs/Invoice/ZipInvoices.php
@ -38,7 +38,7 @@ class ZipInvoices implements ShouldQueue

    public $tries = 1;

-    public $timeout = 3600;
+    public $timeout = 10800;

    /**
     * @param $invoices
--- a/tests/Unit/ImportEncodingTest.php
+++ b/tests/Unit/ImportEncodingTest.php
@ -0,0 +1,480 @@
+<?php
+
+namespace Tests\Unit;
+
+use Tests\TestCase;
+use App\Http\Controllers\ImportController;
+use Illuminate\Http\UploadedFile;
+use Illuminate\Support\Facades\Storage;
+use ReflectionClass;
+use ReflectionMethod;
+
+class ImportEncodingTest extends TestCase
+{
+    private ImportController $controller;
+    private ReflectionMethod $readFileMethod;
+    private ReflectionMethod $containsWindows1252Method;
+    private ReflectionMethod $fixCorruptedMethod;
+    private ReflectionMethod $isValidConversionMethod;
+
+    protected function setUp(): void
+    {
+        parent::setUp();
+        
+        $this->controller = new ImportController();
+        
+        // Use reflection to access private methods
+        $reflection = new ReflectionClass($this->controller);
+        $this->readFileMethod = $reflection->getMethod('readFileWithProperEncoding');
+        $this->readFileMethod->setAccessible(true);
+        
+        $this->containsWindows1252Method = $reflection->getMethod('containsWindows1252Bytes');
+        $this->containsWindows1252Method->setAccessible(true);
+        
+        $this->fixCorruptedMethod = $reflection->getMethod('fixCorruptedWindows1252');
+        $this->fixCorruptedMethod->setAccessible(true);
+        
+        $this->isValidConversionMethod = $reflection->getMethod('isValidConversion');
+        $this->isValidConversionMethod->setAccessible(true);
+    }
+
+    /**
+     * Test data for various encoding scenarios
+     */
+    private function getTestData(): array
+    {
+        return [
+            // Test string with common problematic characters
+            'basic' => "Company's text with quotes",
+            'apostrophes' => "Sya's Ian Le Led",
+            'quotes' => '"Smart quotes" and \'single quotes\'',
+            'currency' => "Price: 50.00, 25.99", // Simplified to avoid currency symbols in basic test
+            'symbols' => "Trademark and copyright symbols",
+            'accents' => "Cafe resume naive facade", // Simplified accents
+        ];
+    }
+
+    /**
+     * Get complex test data with full Unicode characters (for specific encoding tests)
+     */
+    private function getComplexTestData(): array
+    {
+        return [
+            'complex' => "Company's «quoted» text—dash…ellipsis",
+            'currency' => "Price: €50.00, £25.99",
+            'symbols' => "Trademark™ and copyright© symbols",
+            'accents' => "Café résumé naïve piñata façade",
+        ];
+    }
+
+    /**
+     * Windows-1252 special characters (0x80-0x9F range)
+     */
+    private function getWindows1252SpecialChars(): array
+    {
+        return [
+            0x80 => '€',  // Euro sign
+            0x82 => '‚',  // Single low-9 quotation mark
+            0x83 => 'ƒ',  // Latin small letter f with hook
+            0x84 => '„',  // Double low-9 quotation mark
+            0x85 => '…',  // Horizontal ellipsis
+            0x86 => '†',  // Dagger
+            0x87 => '‡',  // Double dagger
+            0x88 => 'ˆ',  // Modifier letter circumflex accent
+            0x89 => '‰',  // Per mille sign
+            0x8A => 'Š',  // Latin capital letter S with caron
+            0x8B => '‹',  // Single left-pointing angle quotation mark
+            0x8C => 'Œ',  // Latin capital ligature OE
+            0x8E => 'Ž',  // Latin capital letter Z with caron
+            0x91 => "\u{2018}",  // Left single quotation mark (smart quote)
+            0x92 => "\u{2019}",  // Right single quotation mark (smart quote)
+            0x93 => "\u{201C}",  // Left double quotation mark
+            0x94 => "\u{201D}",  // Right double quotation mark
+            0x95 => '•',  // Bullet
+            0x96 => '–',  // En dash
+            0x97 => '—',  // Em dash
+            0x98 => '˜',  // Small tilde
+            0x99 => '™',  // Trade mark sign
+            0x9A => 'š',  // Latin small letter s with caron
+            0x9B => '›',  // Single right-pointing angle quotation mark
+            0x9C => 'œ',  // Latin small ligature oe
+            0x9E => 'ž',  // Latin small letter z with caron
+            0x9F => 'Ÿ',  // Latin capital letter Y with diaeresis
+        ];
+    }
+
+    /**
+     * Create a temporary file with specific encoding
+     */
+    private function createTestFile(string $content, string $encoding): string
+    {
+        $tempFile = tempnam(sys_get_temp_dir(), 'encoding_test_');
+        
+        if ($encoding === 'UTF-8-BOM') {
+            $content = "\xEF\xBB\xBF" . $content;
+            file_put_contents($tempFile, $content);
+        } elseif ($encoding === 'UTF-8-CORRUPTED') {
+            // Simulate corrupted UTF-8 with replacement characters
+            $content = str_replace("'", "\xEF\xBF\xBD", $content);
+            file_put_contents($tempFile, $content);
+        } elseif ($encoding === 'UTF-8') {
+            file_put_contents($tempFile, $content);
+        } else {
+            // Convert to target encoding
+            $encoded = mb_convert_encoding($content, $encoding, 'UTF-8');
+            file_put_contents($tempFile, $encoded);
+        }
+        
+        return $tempFile;
+    }
+
+    /**
+     * Test 1: UTF-8 clean files (should pass through unchanged)
+     */
+    public function testCleanUtf8Files()
+    {
+        foreach ($this->getTestData() as $name => $content) {
+            $tempFile = $this->createTestFile($content, 'UTF-8');
+            
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            $this->assertEquals($content, $result, "Clean UTF-8 test failed for: {$name}");
+            $this->assertTrue(
+                $this->isValidConversionMethod->invoke($this->controller, $result),
+                "Validation failed for clean UTF-8: {$name}"
+            );
+            
+            unlink($tempFile);
+        }
+    }
+
+    /**
+     * Test 2: UTF-8 with BOM
+     */
+    public function testUtf8WithBom()
+    {
+        foreach ($this->getTestData() as $name => $content) {
+            $tempFile = $this->createTestFile($content, 'UTF-8-BOM');
+            
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            // Should remove BOM and return clean content
+            $this->assertEquals($content, $result, "UTF-8 BOM test failed for: {$name}");
+            $this->assertTrue(
+                $this->isValidConversionMethod->invoke($this->controller, $result),
+                "Validation failed for UTF-8 BOM: {$name}"
+            );
+            
+            unlink($tempFile);
+        }
+    }
+
+    /**
+     * Test 3: Windows-1252 files
+     */
+    public function testWindows1252Files()
+    {
+        // Test with complex Unicode characters for Windows-1252
+        foreach ($this->getComplexTestData() as $name => $content) {
+            $tempFile = $this->createTestFile($content, 'WINDOWS-1252');
+            
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            $this->assertEquals($content, $result, "Windows-1252 test failed for: {$name}");
+            $this->assertTrue(
+                $this->isValidConversionMethod->invoke($this->controller, $result),
+                "Validation failed for Windows-1252: {$name}"
+            );
+            
+            unlink($tempFile);
+        }
+    }
+
+    /**
+     * Test 3.5: Complex UTF-8 files with Unicode characters
+     */
+    public function testComplexUtf8Files()
+    {
+        foreach ($this->getComplexTestData() as $name => $content) {
+            $tempFile = $this->createTestFile($content, 'UTF-8');
+            
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            $this->assertEquals($content, $result, "Complex UTF-8 test failed for: {$name}");
+            $this->assertTrue(
+                $this->isValidConversionMethod->invoke($this->controller, $result),
+                "Validation failed for complex UTF-8: {$name}"
+            );
+            
+            unlink($tempFile);
+        }
+    }
+
+    /**
+     * Test 4: ISO-8859-1 files
+     */
+    public function testIso88591Files()
+    {
+        // Use only characters that exist in ISO-8859-1
+        $testData = [
+            'basic' => "Company's text",
+            'accents' => "Café résumé naïve façade",
+        ];
+        
+        foreach ($testData as $name => $content) {
+            $tempFile = $this->createTestFile($content, 'ISO-8859-1');
+            
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            $this->assertEquals($content, $result, "ISO-8859-1 test failed for: {$name}");
+            $this->assertTrue(
+                $this->isValidConversionMethod->invoke($this->controller, $result),
+                "Validation failed for ISO-8859-1: {$name}"
+            );
+            
+            unlink($tempFile);
+        }
+    }
+
+    /**
+     * Test 5: Corrupted UTF-8 with replacement characters
+     */
+    public function testCorruptedUtf8Files()
+    {
+        foreach ($this->getTestData() as $name => $content) {
+            $tempFile = $this->createTestFile($content, 'UTF-8-CORRUPTED');
+            
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            // Expected result should have smart quotes instead of straight apostrophes
+            $expectedContent = str_replace("'", "\u{2019}", $content);
+            $this->assertEquals($expectedContent, $result, "Corrupted UTF-8 test failed for: {$name}");
+            $this->assertTrue(
+                $this->isValidConversionMethod->invoke($this->controller, $result),
+                "Validation failed for corrupted UTF-8: {$name}"
+            );
+            
+            unlink($tempFile);
+        }
+    }
+
+    /**
+     * Test 6: All Windows-1252 special characters
+     */
+    public function testAllWindows1252SpecialCharacters()
+    {
+        $specialChars = $this->getWindows1252SpecialChars();
+        
+        foreach ($specialChars as $byte => $expectedChar) {
+            // Create content with the specific byte
+            $content = "Test " . chr($byte) . " character";
+            $tempFile = tempnam(sys_get_temp_dir(), 'char_test_');
+            
+            // Write raw bytes including the Windows-1252 character
+            $rawContent = "Test " . chr($byte) . " character";
+            file_put_contents($tempFile, $rawContent);
+            
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            $expectedResult = "Test {$expectedChar} character";
+            $this->assertEquals(
+                $expectedResult, 
+                $result, 
+                "Windows-1252 character test failed for byte 0x" . dechex($byte) . " ({$expectedChar})"
+            );
+            
+            unlink($tempFile);
+        }
+    }
+
+    /**
+     * Test 7: containsWindows1252Bytes method
+     */
+    public function testContainsWindows1252Bytes()
+    {
+        // Test with Windows-1252 bytes
+        $dataWithWindows1252 = "Test " . chr(0x92) . " content";
+        $this->assertTrue(
+            $this->containsWindows1252Method->invoke($this->controller, $dataWithWindows1252),
+            "Should detect Windows-1252 bytes"
+        );
+        
+        // Test without Windows-1252 bytes
+        $cleanData = "Test clean content";
+        $this->assertFalse(
+            $this->containsWindows1252Method->invoke($this->controller, $cleanData),
+            "Should not detect Windows-1252 bytes in clean data"
+        );
+        
+        // Test with UTF-8 replacement characters
+        $corruptedData = "Test \xEF\xBF\xBD content";
+        $this->assertFalse(
+            $this->containsWindows1252Method->invoke($this->controller, $corruptedData),
+            "Should not detect Windows-1252 bytes in corrupted UTF-8"
+        );
+    }
+
+    /**
+     * Test 8: fixCorruptedWindows1252 method
+     */
+    public function testFixCorruptedWindows1252()
+    {
+        $corruptedData = "Sya\xEF\xBF\xBDs In Le";
+        $expectedResult = "Sya\u{2019}s In Le";
+        
+        $result = $this->fixCorruptedMethod->invoke($this->controller, $corruptedData);
+        
+        $this->assertEquals($expectedResult, $result, "Failed to fix corrupted Windows-1252 data");
+    }
+
+    /**
+     * Test 9: isValidConversion method
+     */
+    public function testIsValidConversion()
+    {
+        // Valid UTF-8 without replacement characters
+        $validData = "Clean UTF-8 content with apostrophe's";
+        $this->assertTrue(
+            $this->isValidConversionMethod->invoke($this->controller, $validData),
+            "Should validate clean UTF-8 content"
+        );
+        
+        // Invalid - contains replacement character bytes
+        $invalidData1 = "Content with \xEF\xBF\xBD replacement";
+        $this->assertFalse(
+            $this->isValidConversionMethod->invoke($this->controller, $invalidData1),
+            "Should reject content with UTF-8 replacement bytes"
+        );
+        
+        // Invalid - contains double-encoded replacement
+        $invalidData2 = "Content with ï¿½ replacement";
+        $this->assertFalse(
+            $this->isValidConversionMethod->invoke($this->controller, $invalidData2),
+            "Should reject content with double-encoded replacement"
+        );
+        
+        // Invalid UTF-8
+        $invalidUtf8 = "Invalid \xFF UTF-8";
+        $this->assertFalse(
+            $this->isValidConversionMethod->invoke($this->controller, $invalidUtf8),
+            "Should reject invalid UTF-8"
+        );
+    }
+
+    /**
+     * Test 10: Multiple encoding types comprehensive test
+     */
+    public function testMultipleEncodingTypes()
+    {
+        $encodings = [
+            'UTF-8',
+            'WINDOWS-1252',
+            'ISO-8859-1',
+            'ISO-8859-15',
+            'ASCII',
+        ];
+        
+        $testContent = "Company's «test» data—with symbols";
+        
+        foreach ($encodings as $encoding) {
+            if ($encoding === 'ASCII') {
+                // ASCII can't handle special characters, use simpler content
+                $content = "Company data test";
+            } else {
+                $content = $testContent;
+            }
+            
+            $tempFile = $this->createTestFile($content, $encoding);
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            // Result should always be valid UTF-8
+            $this->assertTrue(
+                mb_check_encoding($result, 'UTF-8'),
+                "Result should be valid UTF-8 for encoding: {$encoding}"
+            );
+            
+            // Should not contain replacement characters
+            $this->assertFalse(
+                str_contains($result, '<27>'),
+                "Result should not contain replacement characters for encoding: {$encoding}"
+            );
+            
+            unlink($tempFile);
+        }
+    }
+
+    /**
+     * Test 11: Backward compatibility - existing functionality should not break
+     */
+    public function testBackwardCompatibility()
+    {
+        // Test that normal CSV content still works
+        $csvContent = "Name,Amount,Date\n\"John's Company\",100.50,2024-01-01\n\"Mary's Store\",250.75,2024-01-02";
+        
+        $tempFile = $this->createTestFile($csvContent, 'UTF-8');
+        $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+        
+        $this->assertEquals($csvContent, $result, "Backward compatibility test failed for CSV content");
+        
+        // Test that it contains expected structure
+        $this->assertStringContainsString("John's Company", $result, "CSV should contain original apostrophes");
+        $this->assertStringContainsString("Mary's Store", $result, "CSV should contain original apostrophes");
+        
+        unlink($tempFile);
+    }
+
+    /**
+     * Test 12: Edge cases and error handling
+     */
+    public function testEdgeCases()
+    {
+        // Empty file
+        $tempFile = tempnam(sys_get_temp_dir(), 'empty_test_');
+        file_put_contents($tempFile, '');
+        
+        $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+        $this->assertEquals('', $result, "Empty file should return empty string");
+        
+        unlink($tempFile);
+        
+        // Non-existent file
+        $result = $this->readFileMethod->invoke($this->controller, '/non/existent/file.csv');
+        $this->assertEquals('', $result, "Non-existent file should return empty string");
+        
+        // Very large content with mixed characters
+        $largeContent = str_repeat("Test's data with special chars—", 1000);
+        $tempFile = $this->createTestFile($largeContent, 'WINDOWS-1252');
+        
+        $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+        $this->assertTrue(
+            $this->isValidConversionMethod->invoke($this->controller, $result),
+            "Large file conversion should be valid"
+        );
+        
+        unlink($tempFile);
+    }
+
+    /**
+     * Test 13: Performance test to ensure no significant regression
+     */
+    public function testPerformance()
+    {
+        $content = str_repeat("Company's data with special characters test\n", 10000);
+        $tempFile = $this->createTestFile($content, 'WINDOWS-1252');
+        
+        $startTime = microtime(true);
+        $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+        $endTime = microtime(true);
+        
+        $processingTime = $endTime - $startTime;
+        
+        // Should process reasonably fast (less than 1 second for 10k lines)
+        $this->assertLessThan(1.0, $processingTime, "Processing should be reasonably fast");
+        $this->assertTrue(
+            $this->isValidConversionMethod->invoke($this->controller, $result),
+            "Performance test result should be valid"
+        );
+        
+        unlink($tempFile);
+    }
+} 
--- a/tests/Unit/ImportUnicodeEncodingTest.php
+++ b/tests/Unit/ImportUnicodeEncodingTest.php
@ -0,0 +1,511 @@
+<?php
+
+namespace Tests\Unit;
+
+use Tests\TestCase;
+use App\Http\Controllers\ImportController;
+use ReflectionClass;
+use ReflectionMethod;
+
+class ImportUnicodeEncodingTest extends TestCase
+{
+    private ImportController $controller;
+    private ReflectionMethod $readFileMethod;
+    private ReflectionMethod $isValidConversionMethod;
+    private ReflectionMethod $removeBOMMethod;
+
+    protected function setUp(): void
+    {
+        parent::setUp();
+        
+        $this->controller = new ImportController();
+        
+        // Use reflection to access private methods
+        $reflection = new ReflectionClass($this->controller);
+        $this->readFileMethod = $reflection->getMethod('readFileWithProperEncoding');
+        $this->readFileMethod->setAccessible(true);
+        
+        $this->isValidConversionMethod = $reflection->getMethod('isValidConversion');
+        $this->isValidConversionMethod->setAccessible(true);
+        
+        $this->removeBOMMethod = $reflection->getMethod('removeBOM');
+        $this->removeBOMMethod->setAccessible(true);
+    }
+
+    /**
+     * Test data with various Unicode blocks and international content
+     */
+    private function getUnicodeTestData(): array
+    {
+        return [
+            // Basic Latin and Latin Extended
+            'latin_basic' => "Hello World! Company's data",
+            'latin_extended' => "Café résumé naïve piñata façade",
+            
+            // Greek
+            'greek' => "Καλημέρα κόσμε! Ελληνικά γράμματα",
+            
+            // Cyrillic
+            'cyrillic' => "Привет мир! Русский текст",
+            
+            // Arabic (RTL)
+            'arabic' => "مرحبا بالعالم! النص العربي",
+            
+            // Hebrew (RTL)
+            'hebrew' => "שלום עולם! טקסט עברי",
+            
+            // Chinese Simplified
+            'chinese_simplified' => "你好世界！简体中文",
+            
+            // Chinese Traditional
+            'chinese_traditional' => "你好世界！繁體中文",
+            
+            // Japanese (Hiragana, Katakana, Kanji)
+            'japanese' => "こんにちは世界！ひらがな・カタカナ・漢字",
+            
+            // Korean
+            'korean' => "안녕하세요 세계! 한국어 텍스트",
+            
+            // Mathematical symbols
+            'mathematical' => "∑∫∞±≤≥≠√∂∇∆",
+            
+            // Currency symbols
+            'currency' => "€£¥₹₽₨₩₪₦₡₸",
+            
+            // Emoji and symbols
+            'emoji' => "😀🌍🚀💻📊✨🎉🔥💡⭐",
+            
+            // Mixed scripts
+            'mixed_scripts' => "Hello мир 世界 🌍 café résumé",
+            
+            // Special Unicode cases
+            'zero_width' => "Text\u{200B}with\u{FEFF}zero\u{200C}width\u{200D}chars",
+            'combining' => "e\u{0301}a\u{0300}i\u{0302}o\u{0303}u\u{0308}", // é à î õ ü
+            
+            // Quotation marks and dashes
+            'punctuation' => "«quotes» \u{201C}smart\u{201D} \u{2018}quotes\u{2019} — – … ‚ „",
+        ];
+    }
+
+    /**
+     * Extended encoding list for comprehensive testing
+     */
+    private function getExtendedEncodings(): array
+    {
+        return [
+            // Unicode variants
+            'UTF-8',
+            'UTF-8-BOM',
+            'UTF-16BE',
+            'UTF-16LE',
+            'UTF-32BE',
+            'UTF-32LE',
+            
+            // ISO Latin variants (commonly supported)
+            'ISO-8859-1',   // Western European
+            'ISO-8859-2',   // Central European
+            'ISO-8859-5',   // Cyrillic
+            'ISO-8859-7',   // Greek
+            'ISO-8859-9',   // Turkish
+            'ISO-8859-15',  // Western European (with Euro)
+            
+            // Windows code pages (commonly supported)
+            'Windows-1251', // Cyrillic
+            'Windows-1252', // Western European
+            
+            // Other commonly supported encodings
+            'CP1252',       // Windows Western
+        ];
+    }
+
+    /**
+     * Create a test file with specific content and encoding
+     */
+    private function createTestFile(string $content, string $encoding): string
+    {
+        $tempFile = tempnam(sys_get_temp_dir(), 'unicode_test_');
+        
+        switch ($encoding) {
+            case 'UTF-8-BOM':
+                $content = "\xEF\xBB\xBF" . $content;
+                file_put_contents($tempFile, $content);
+                break;
+                
+            case 'UTF-16BE':
+                $content = "\xFE\xFF" . mb_convert_encoding($content, 'UTF-16BE', 'UTF-8');
+                file_put_contents($tempFile, $content);
+                break;
+                
+            case 'UTF-16LE':
+                $content = "\xFF\xFE" . mb_convert_encoding($content, 'UTF-16LE', 'UTF-8');
+                file_put_contents($tempFile, $content);
+                break;
+                
+            case 'UTF-32BE':
+                $content = "\x00\x00\xFE\xFF" . mb_convert_encoding($content, 'UTF-32BE', 'UTF-8');
+                file_put_contents($tempFile, $content);
+                break;
+                
+            case 'UTF-32LE':
+                $content = "\xFF\xFE\x00\x00" . mb_convert_encoding($content, 'UTF-32LE', 'UTF-8');
+                file_put_contents($tempFile, $content);
+                break;
+                
+            case 'UTF-8':
+                file_put_contents($tempFile, $content);
+                break;
+                
+            default:
+                // Try to convert using mb_convert_encoding
+                try {
+                    // Check if encoding is supported
+                    if (!in_array($encoding, mb_list_encodings())) {
+                        // If encoding not supported, use UTF-8 fallback
+                        file_put_contents($tempFile, $content);
+                        break;
+                    }
+                    
+                    $encoded = mb_convert_encoding($content, $encoding, 'UTF-8');
+                    file_put_contents($tempFile, $encoded);
+                } catch (Exception | ValueError $e) {
+                    // If conversion fails, use UTF-8 fallback
+                    file_put_contents($tempFile, $content);
+                }
+                break;
+        }
+        
+        return $tempFile;
+    }
+
+    /**
+     * Test 1: Unicode content preservation across different UTF encodings
+     */
+    public function testUnicodeContentPreservation()
+    {
+        $unicodeEncodings = ['UTF-8', 'UTF-8-BOM', 'UTF-16BE', 'UTF-16LE', 'UTF-32BE', 'UTF-32LE'];
+        
+        foreach ($this->getUnicodeTestData() as $name => $content) {
+            foreach ($unicodeEncodings as $encoding) {
+                $tempFile = $this->createTestFile($content, $encoding);
+                
+                $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+                
+                $this->assertEquals(
+                    $content, 
+                    $result, 
+                    "Unicode preservation failed for {$name} with {$encoding} encoding"
+                );
+                
+                $this->assertTrue(
+                    $this->isValidConversionMethod->invoke($this->controller, $result),
+                    "Validation failed for {$name} with {$encoding} encoding"
+                );
+                
+                unlink($tempFile);
+            }
+        }
+    }
+
+    /**
+     * Test 2: BOM handling for different UTF variants
+     */
+    public function testBOMHandlingForAllUTF()
+    {
+        $testContent = "Hello 世界! Тест العالم";
+        
+        $bomTests = [
+            'UTF-8' => "\xEF\xBB\xBF",
+            'UTF-16BE' => "\xFE\xFF",
+            'UTF-16LE' => "\xFF\xFE",
+            'UTF-32BE' => "\x00\x00\xFE\xFF",
+            'UTF-32LE' => "\xFF\xFE\x00\x00",
+        ];
+        
+        foreach ($bomTests as $encoding => $bom) {
+            // Create file with BOM using the createTestFile method
+            $tempFile = $this->createTestFile($testContent, $encoding);
+            
+            // Test file processing with BOM
+            $fileResult = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            $this->assertEquals(
+                $testContent, 
+                $fileResult, 
+                "File processing with BOM failed for {$encoding}"
+            );
+            
+            $this->assertTrue(
+                $this->isValidConversionMethod->invoke($this->controller, $fileResult),
+                "BOM file validation failed for {$encoding}"
+            );
+            
+            unlink($tempFile);
+        }
+        
+        // Test UTF-8 BOM removal specifically (since that's what the method is designed for)
+        $utf8DataWithBOM = "\xEF\xBB\xBF" . $testContent;
+        $result = $this->removeBOMMethod->invoke($this->controller, $utf8DataWithBOM);
+        
+        $this->assertEquals(
+            $testContent, 
+            $result, 
+            "UTF-8 BOM removal failed"
+        );
+    }
+
+    /**
+     * Test 3: Extended encoding compatibility
+     */
+    public function testExtendedEncodingCompatibility()
+    {
+        // Use content that's compatible with most encodings
+        $basicContent = "Company data with special chars";
+        $accentContent = "Cafe resume naive facade"; // Without actual accents for broader compatibility
+        
+        foreach ($this->getExtendedEncodings() as $encoding) {
+            // Skip encodings that are known to not support certain characters
+            $content = $this->isAsciiCompatibleEncoding($encoding) ? $basicContent : $accentContent;
+            
+            $tempFile = $this->createTestFile($content, $encoding);
+            
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            // Result should always be valid UTF-8
+            $this->assertTrue(
+                mb_check_encoding($result, 'UTF-8'),
+                "Result should be valid UTF-8 for encoding: {$encoding}"
+            );
+            
+            // Should not contain replacement characters
+            $this->assertFalse(
+                str_contains($result, '<27>'),
+                "Result should not contain replacement characters for encoding: {$encoding}"
+            );
+            
+            $this->assertTrue(
+                $this->isValidConversionMethod->invoke($this->controller, $result),
+                "Validation failed for encoding: {$encoding}"
+            );
+            
+            unlink($tempFile);
+        }
+    }
+
+    /**
+     * Test 4: Right-to-left (RTL) text handling
+     */
+    public function testRightToLeftTextHandling()
+    {
+        $rtlContent = [
+            'arabic' => "مرحبا بالعالم! شركة البيانات",
+            'hebrew' => "שלום עולם! חברת הנתונים",
+            'mixed_rtl' => "Hello مرحبا World עולם!",
+        ];
+        
+        foreach ($rtlContent as $name => $content) {
+            $tempFile = $this->createTestFile($content, 'UTF-8');
+            
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            $this->assertEquals($content, $result, "RTL test failed for: {$name}");
+            $this->assertTrue(
+                $this->isValidConversionMethod->invoke($this->controller, $result),
+                "RTL validation failed for: {$name}"
+            );
+            
+            unlink($tempFile);
+        }
+    }
+
+    /**
+     * Test 5: Asian character sets (CJK)
+     */
+    public function testAsianCharacterSets()
+    {
+        $cjkContent = [
+            'chinese_simplified' => "公司数据处理系统",
+            'chinese_traditional' => "公司資料處理系統",
+            'japanese_hiragana' => "かいしゃのでーたしすてむ",
+            'japanese_katakana' => "カイシャノデータシステム",
+            'japanese_kanji' => "会社のデータシステム",
+            'korean' => "회사 데이터 시스템",
+            'mixed_cjk' => "Company 公司 会社 회사 Data",
+        ];
+        
+        foreach ($cjkContent as $name => $content) {
+            $tempFile = $this->createTestFile($content, 'UTF-8');
+            
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            $this->assertEquals($content, $result, "CJK test failed for: {$name}");
+            $this->assertTrue(
+                $this->isValidConversionMethod->invoke($this->controller, $result),
+                "CJK validation failed for: {$name}"
+            );
+            
+            unlink($tempFile);
+        }
+    }
+
+    /**
+     * Test 6: Emoji and symbol handling
+     */
+    public function testEmojiAndSymbolHandling()
+    {
+        $symbolContent = [
+            'basic_emoji' => "Data 📊 Reports 📈 Analysis 🔍",
+            'complex_emoji' => "👨‍💻👩‍💼🏢💼📋📊📈📉",
+            'mathematical' => "∑(x²) ∫f(x)dx ∞ ≠ ≤ ≥ ± √",
+            'currency_symbols' => "Price: €100 £80 ¥1000 $75",
+            'technical_symbols' => "® © ™ § ¶ † ‡ • ‰ ‱",
+            'arrows_symbols' => "← → ↑ ↓ ↔ ↕ ⇐ ⇒ ⇔",
+        ];
+        
+        foreach ($symbolContent as $name => $content) {
+            $tempFile = $this->createTestFile($content, 'UTF-8');
+            
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            $this->assertEquals($content, $result, "Symbol test failed for: {$name}");
+            $this->assertTrue(
+                $this->isValidConversionMethod->invoke($this->controller, $result),
+                "Symbol validation failed for: {$name}"
+            );
+            
+            unlink($tempFile);
+        }
+    }
+
+    /**
+     * Test 7: Combining characters and normalization
+     */
+    public function testCombiningCharacters()
+    {
+        $combiningContent = [
+            'accents_composed' => "café résumé naïve",
+            'accents_decomposed' => "cafe\u{0301} re\u{0301}sume\u{0301} nai\u{0308}ve",
+            'mixed_normalization' => "café cafe\u{0301} résumé re\u{0301}sume\u{0301}",
+        ];
+        
+        foreach ($combiningContent as $name => $content) {
+            $tempFile = $this->createTestFile($content, 'UTF-8');
+            
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            // Content should be preserved (normalization might occur but content should be valid)
+            $this->assertTrue(
+                mb_check_encoding($result, 'UTF-8'),
+                "Combining character result should be valid UTF-8 for: {$name}"
+            );
+            $this->assertTrue(
+                $this->isValidConversionMethod->invoke($this->controller, $result),
+                "Combining character validation failed for: {$name}"
+            );
+            
+            unlink($tempFile);
+        }
+    }
+
+    /**
+     * Test 8: Large Unicode content performance
+     */
+    public function testLargeUnicodeContentPerformance()
+    {
+        $unicodePattern = "🌍 Hello 世界 مرحبا Здравствуй שלום こんにちは 안녕하세요 ";
+        $largeContent = str_repeat($unicodePattern, 1000); // ~50KB of Unicode content
+        
+        $tempFile = $this->createTestFile($largeContent, 'UTF-8');
+        
+        $startTime = microtime(true);
+        $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+        $endTime = microtime(true);
+        
+        $processingTime = $endTime - $startTime;
+        
+        $this->assertLessThan(2.0, $processingTime, "Large Unicode content processing should be fast");
+        $this->assertEquals($largeContent, $result, "Large Unicode content should be preserved");
+        $this->assertTrue(
+            $this->isValidConversionMethod->invoke($this->controller, $result),
+            "Large Unicode content validation failed"
+        );
+        
+        unlink($tempFile);
+    }
+
+    /**
+     * Test 9: Mixed encoding scenarios
+     */
+    public function testMixedEncodingScenarios()
+    {
+        // Simulate files that might have mixed encoding issues
+        $scenarios = [
+            'mostly_ascii_with_unicode' => "Regular text with émojis 😀 and symbols ™",
+            'csv_with_international' => "Name,Company,Location\n\"José García\",\"Café España\",\"São Paulo\"",
+            'business_names' => "McDonald's, L'Oréal, Nestlé, Björk & Co, Müller GmbH",
+        ];
+        
+        foreach ($scenarios as $name => $content) {
+            // Test with multiple encodings
+            $encodings = ['UTF-8', 'UTF-8-BOM', 'WINDOWS-1252', 'ISO-8859-1'];
+            
+            foreach ($encodings as $encoding) {
+                $tempFile = $this->createTestFile($content, $encoding);
+                
+                $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+                
+                $this->assertTrue(
+                    mb_check_encoding($result, 'UTF-8'),
+                    "Mixed encoding result should be valid UTF-8 for {$name} with {$encoding}"
+                );
+                $this->assertTrue(
+                    $this->isValidConversionMethod->invoke($this->controller, $result),
+                    "Mixed encoding validation failed for {$name} with {$encoding}"
+                );
+                
+                unlink($tempFile);
+            }
+        }
+    }
+
+    /**
+     * Helper method to determine if an encoding is ASCII-compatible
+     */
+    private function isAsciiCompatibleEncoding(string $encoding): bool
+    {
+        $asciiOnlyEncodings = ['ASCII', 'US-ASCII'];
+        return in_array($encoding, $asciiOnlyEncodings);
+    }
+
+    /**
+     * Test 10: CSV data with international content
+     */
+    public function testCSVWithInternationalContent()
+    {
+        $csvContent = "Name,Company,City,Country,Notes\n" .
+                     "\"José García\",\"Café España\",\"São Paulo\",\"Brasil\",\"Açaí supplier\"\n" .
+                     "\"李小明\",\"北京科技公司\",\"北京\",\"中国\",\"Technology partner\"\n" .
+                     "\"Müller\",\"Bäckerei München\",\"München\",\"Deutschland\",\"Café & Bäckerei\"\n" .
+                     "\"Иванов\",\"Москва ООО\",\"Москва\",\"Россия\",\"Software development\"\n" .
+                     "\"محمد أحمد\",\"شركة الرياض\",\"الرياض\",\"السعودية\",\"Trading company\"";
+        
+        $encodings = ['UTF-8', 'UTF-8-BOM', 'WINDOWS-1252'];
+        
+        foreach ($encodings as $encoding) {
+            $tempFile = $this->createTestFile($csvContent, $encoding);
+            
+            $result = $this->readFileMethod->invoke($this->controller, $tempFile);
+            
+            $this->assertTrue(
+                mb_check_encoding($result, 'UTF-8'),
+                "CSV result should be valid UTF-8 for encoding: {$encoding}"
+            );
+            
+            // Check that it contains expected international content
+            $this->assertStringContainsString("José García", $result, "Should contain Spanish names");
+            $this->assertStringContainsString("李小明", $result, "Should contain Chinese names");
+            $this->assertStringContainsString("Müller", $result, "Should contain German names");
+            
+            unlink($tempFile);
+        }
+    }
+}