在这里备份一下,基于mecab和unidic。
可以把汉字翻译成平假名,把外来语翻译成片假名。
之前做的,后面发现对自己学日语没啥用,项目就停掉了。
<?php
// apt install mecab unidic-mecab
if ($_SERVER['REQUEST_METHOD'] === 'POST') {
try {
$process = proc_open('mecab', [0 => ['pipe', 'r'], 1 => ['pipe', 'w'], 2 => ['pipe', 'w']], $pipes);
if (is_resource($process)) {
fwrite($pipes[0], file_get_contents('php://input'));
fclose($pipes[0]);
$result = stream_get_contents($pipes[1]);
fclose($pipes[1]);
$error = stream_get_contents($pipes[2]);
fclose($pipes[2]);
proc_close($process);
if (!empty($error)) { throw new Exception($error); }
http_response_code(200);
echo $result;
} else { throw new Exception('Failed to open process'); }
} catch (Exception $e) {
http_response_code(500);
echo $e;
}
exit;
}
?>
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="icon" href="data:," />
<title>JPhonetic - MeCab + UniDic</title>
</head>
<body>
<textarea id="inputText" style="display:block;width:600px;max-width:calc(100% - 4px);height:200px;"></textarea>
<input type="button"
onclick="notation(document.querySelector('#inputText').value, document.querySelector('#text'));" value="注音" />
<input type="file" id="fileInput" /><br /><br />
<div id="text"></div>
<script>
function hiragana(str) {
return str.replace(/[\u30A1-\u30F6]/g, ch => String.fromCharCode(ch.charCodeAt(0) - 0x60));
}
function notation(text, dom) {
if (!text) { return; }
fetch('', {
method: 'POST',
headers: { 'Content-Type': 'text/plain' },
body: text
})
.then(response => response.text())
.then(data => {
let result = '';
data.split(/\r?\n/).map((row) => {
const values = row.split('\t');
if (values.length <= 1) { return; }
const word = values[0];
const part = values[1].split(/,(?=(?:["]*"["]*")*[^"]*$)/);
const yomi = part[20] ?? '';
if (/[\u4E00-\u9FFF\u3400-\u4DBF]/.test(word) && yomi) {
result += " " + word + "[" + hiragana(yomi) + "] ";
} else if (/^[\u30A0-\u30FF]+$/.test(word) && part[7]?.split('-')[1]) {
result += " " + word + "[" + part[7]?.split('-')[1] + "] ";
} else if (/^[A-Za-z0-9\p{P}]+$/u.test(word)) {
result += " " + word + " ";
} else {
result += word;
}
});
dom.innerHTML = result;
})
.catch(error => {
console.error('Error:', error);
});
}
document.getElementById("fileInput").addEventListener("change", async function () {
const file = this.files[0];
if (!file) return;
const formData = new FormData();
formData.append("file", file);
formData.append("url", "");
formData.append("language", "auto");
formData.append("isOverlayRequired", "true");
formData.append("FileType", ".Auto");
formData.append("IsCreateSearchablePDF", "false");
formData.append("isSearchablePdfHideTextLayer", "true");
formData.append("detectOrientation", "false");
formData.append("isTable", "false");
formData.append("scale", "true");
formData.append("OCREngine", "3");
formData.append("detectCheckbox", "false");
formData.append("checkboxTemplate", "0");
document.querySelector("#text").innerHTML = '<span style="color: blue;">Loading...</span>';
try {
const res = await fetch("https://api8.ocr.space/parse/image", {
method: "POST",
headers: {
"accept": "application/json, text/javascript, */*; q=0.01",
"apikey": "donotstealthiskey_ip1",
"origin": "https://ocr.space",
"referer": "https://ocr.space/",
},
body: formData
});
const json = await res.json();
if (json?.ParsedResults?.[0]?.ParsedText) {
notation(json?.ParsedResults?.[0]?.ParsedText, document.querySelector("#text"));
} else {
document.querySelector("#text").innerHTML = '<span style="color:red;">' + JSON.stringify(json) + '</span>';
}
} catch (error) {
document.querySelector("#text").innerHTML = '<span style="color:red;">' + error.message + '</span>';
}
});
</script>
</body>
</html>