217 lines
6.6 KiB
PowerShell
217 lines
6.6 KiB
PowerShell
param(
|
||
[string]$OutputDir = 'assets/words',
|
||
[int]$BatchSize = 100
|
||
)
|
||
|
||
$ErrorActionPreference = 'Stop'
|
||
|
||
$langMap = [ordered]@{
|
||
ar = 'ar'
|
||
ca = 'ca'
|
||
de = 'de'
|
||
en = 'en'
|
||
es = 'es'
|
||
eu = 'eu'
|
||
fr = 'fr'
|
||
hi = 'hi'
|
||
it = 'it'
|
||
ja = 'ja'
|
||
ko = 'ko'
|
||
nl = 'nl'
|
||
pl = 'pl'
|
||
pt = 'pt'
|
||
ru = 'ru'
|
||
tr = 'tr'
|
||
zh = 'zh-CN'
|
||
zh_TW = 'zh-TW'
|
||
}
|
||
|
||
$categoryKeyMap = [ordered]@{
|
||
animales = 'categoryAnimals'
|
||
comida = 'categoryFood'
|
||
paises = 'categoryCountries'
|
||
deportes = 'categorySports'
|
||
profesiones = 'categoryProfessions'
|
||
objetos = 'categoryObjects'
|
||
lugares = 'categoryPlaces'
|
||
peliculas = 'categoryMovies'
|
||
musica = 'categoryMusic'
|
||
tecnologia = 'categoryTechnology'
|
||
}
|
||
|
||
$contextMap = [ordered]@{
|
||
animales = 'animal'
|
||
comida = 'food'
|
||
paises = 'country'
|
||
deportes = 'sport'
|
||
profesiones = 'profession'
|
||
objetos = 'object'
|
||
lugares = 'place'
|
||
peliculas = 'movie'
|
||
musica = 'music'
|
||
tecnologia = 'technology'
|
||
}
|
||
|
||
$utf8Strict = [System.Text.UTF8Encoding]::new($false, $true)
|
||
$utf8NoBom = [System.Text.UTF8Encoding]::new($false)
|
||
|
||
function Read-Utf8Json([string]$path) {
|
||
$text = $utf8Strict.GetString([System.IO.File]::ReadAllBytes((Resolve-Path $path))).TrimStart([char]0xFEFF)
|
||
return $text | ConvertFrom-Json
|
||
}
|
||
|
||
function Write-Utf8Json([object]$obj, [string]$path) {
|
||
$full = Join-Path (Get-Location) $path
|
||
$dir = Split-Path -Parent $full
|
||
if ($dir -and -not (Test-Path $dir)) { New-Item -ItemType Directory -Force $dir | Out-Null }
|
||
[System.IO.File]::WriteAllText($full, ($obj | ConvertTo-Json -Depth 10) + "`n", $utf8NoBom)
|
||
}
|
||
|
||
function Strip-Context([string]$value) {
|
||
$clean = $value.Trim()
|
||
if ($clean -match '^\s*[^::]{1,30}\s*[::]\s*') {
|
||
return ($clean -replace '^\s*[^::]{1,30}\s*[::]\s*', '').Trim()
|
||
}
|
||
return $clean
|
||
}
|
||
|
||
function Translate-Batch([string[]]$terms, [string]$target) {
|
||
if ($terms.Count -eq 0) { return @() }
|
||
$numbered = New-Object System.Collections.Generic.List[string]
|
||
for ($i = 0; $i -lt $terms.Count; $i++) {
|
||
$numbered.Add("[$i] $($terms[$i])")
|
||
}
|
||
$query = [uri]::EscapeDataString(($numbered -join "`n"))
|
||
$url = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=es&tl=$target&dt=t&q=$query"
|
||
|
||
try {
|
||
$response = Invoke-RestMethod -Uri $url -TimeoutSec 45
|
||
$translated = (($response[0] | ForEach-Object { $_[0] }) -join '')
|
||
$matches = [regex]::Matches(
|
||
$translated,
|
||
'(?s)\[(\d+)\]\s*(.*?)(?=\s*\[\d+\]\s*|$)'
|
||
)
|
||
if ($matches.Count -eq $terms.Count) {
|
||
$out = New-Object string[] $terms.Count
|
||
foreach ($match in $matches) {
|
||
$index = [int]$match.Groups[1].Value
|
||
if ($index -ge 0 -and $index -lt $terms.Count) {
|
||
$out[$index] = Strip-Context $match.Groups[2].Value
|
||
}
|
||
}
|
||
if (($out | Where-Object { $_ -eq $null -or $_.Trim().Length -eq 0 }).Count -eq 0) {
|
||
return $out
|
||
}
|
||
}
|
||
} catch {
|
||
Start-Sleep -Milliseconds 250
|
||
}
|
||
|
||
$out = New-Object System.Collections.Generic.List[string]
|
||
foreach ($term in $terms) {
|
||
$queryOne = [uri]::EscapeDataString($term)
|
||
$urlOne = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=es&tl=$target&dt=t&q=$queryOne"
|
||
$translatedOne = $null
|
||
for ($attempt = 1; $attempt -le 4; $attempt++) {
|
||
try {
|
||
$responseOne = Invoke-RestMethod -Uri $urlOne -TimeoutSec 45
|
||
$translatedOne = (($responseOne[0] | ForEach-Object { $_[0] }) -join '')
|
||
break
|
||
} catch {
|
||
if ($attempt -eq 4) { throw }
|
||
Start-Sleep -Milliseconds (250 * $attempt)
|
||
}
|
||
}
|
||
$out.Add((Strip-Context $translatedOne))
|
||
Start-Sleep -Milliseconds 35
|
||
}
|
||
return $out.ToArray()
|
||
}
|
||
|
||
$sourceEs = Read-Utf8Json 'assets/palabras.json'
|
||
$sourceEn = Read-Utf8Json 'assets/palabras_en.json'
|
||
$sourceFr = Read-Utf8Json 'assets/palabras_fr.json'
|
||
|
||
$arbByLang = @{}
|
||
foreach ($lang in $langMap.Keys) {
|
||
$arbByLang[$lang] = Read-Utf8Json ("lib/l10n/app_{0}.arb" -f $lang)
|
||
}
|
||
|
||
function New-LanguageBank([string]$lang) {
|
||
$bank = [ordered]@{
|
||
version = 2
|
||
idioma = $lang
|
||
categorias = [ordered]@{}
|
||
}
|
||
|
||
foreach ($category in $categoryKeyMap.Keys) {
|
||
$labelKey = $categoryKeyMap[$category]
|
||
$words = switch ($lang) {
|
||
'es' { @($sourceEs.categorias.$category) }
|
||
'en' { @($sourceEn.categorias.$category) }
|
||
'fr' { @($sourceFr.categorias.$category) }
|
||
default { @() }
|
||
}
|
||
|
||
$bank.categorias[$category] = [ordered]@{
|
||
pista = [string]$arbByLang[$lang].$labelKey
|
||
palabras = @($words | ForEach-Object { [string]$_ })
|
||
}
|
||
}
|
||
|
||
return $bank
|
||
}
|
||
|
||
foreach ($lang in @('es', 'en', 'fr')) {
|
||
Write-Utf8Json (New-LanguageBank $lang) (Join-Path $OutputDir "palabras_$lang.json")
|
||
}
|
||
|
||
$targets = @($langMap.Keys | Where-Object { $_ -notin @('es', 'en', 'fr') })
|
||
foreach ($lang in $targets) {
|
||
Write-Host "Generating $lang..."
|
||
$bank = New-LanguageBank $lang
|
||
$targetCode = $langMap[$lang]
|
||
|
||
foreach ($category in $categoryKeyMap.Keys) {
|
||
$spanishWords = @($sourceEs.categorias.$category)
|
||
$context = $contextMap[$category]
|
||
$translatedWords = New-Object System.Collections.Generic.List[string]
|
||
|
||
for ($offset = 0; $offset -lt $spanishWords.Count; $offset += $BatchSize) {
|
||
$last = [Math]::Min($offset + $BatchSize - 1, $spanishWords.Count - 1)
|
||
$terms = New-Object System.Collections.Generic.List[string]
|
||
for ($index = $offset; $index -le $last; $index++) {
|
||
$terms.Add("${context}: $($spanishWords[$index])")
|
||
}
|
||
$translated = @(Translate-Batch $terms.ToArray() $targetCode)
|
||
foreach ($word in $translated) { $translatedWords.Add($word) }
|
||
Start-Sleep -Milliseconds 70
|
||
}
|
||
|
||
$bank.categorias[$category].palabras = $translatedWords.ToArray()
|
||
}
|
||
|
||
Write-Utf8Json $bank (Join-Path $OutputDir "palabras_$lang.json")
|
||
}
|
||
|
||
Write-Host 'Validating UTF-8 and sample accents...'
|
||
foreach ($lang in $langMap.Keys) {
|
||
$file = Join-Path $OutputDir "palabras_$lang.json"
|
||
$bytes = [System.IO.File]::ReadAllBytes((Resolve-Path $file))
|
||
$null = $utf8Strict.GetString($bytes)
|
||
if ($bytes.Length -ge 3 -and $bytes[0] -eq 239 -and $bytes[1] -eq 187 -and $bytes[2] -eq 191) {
|
||
throw "Unexpected UTF-8 BOM in $file"
|
||
}
|
||
$bank = Read-Utf8Json $file
|
||
foreach ($category in $categoryKeyMap.Keys) {
|
||
$expected = @($sourceEs.categorias.$category).Count
|
||
$actual = @($bank.categorias.$category.palabras).Count
|
||
if ($actual -ne $expected) { throw "Word count mismatch in $file / $category. Expected $expected, got $actual" }
|
||
}
|
||
}
|
||
|
||
$esBank = Read-Utf8Json (Join-Path $OutputDir 'palabras_es.json')
|
||
$leon = @($esBank.categorias.animales.palabras) | Where-Object { $_ -eq 'León' } | Select-Object -First 1
|
||
if ($leon -ne 'León') { throw 'León accent validation failed' }
|
||
Write-Host "OK: generated split word banks in $OutputDir"
|