fix: #I7VP0I 遍历文件编码时,取概率相同,但最先出现的

This commit is contained in:
songdragon 2023-08-24 23:08:31 +08:00
parent c678fda573
commit 8a931725bf

View File

@ -23,6 +23,10 @@ import static org.jcnc.jnotepad.constants.TextConstants.UNKNOWN;
public class EncodingDetector {
private static final Logger LOG = LogUtil.getLogger(EncodingDetector.class);
/**
* 编码侦测概率阈值50%
*/
public static final int THRESHOLD_CONFIDENCE = 50;
private EncodingDetector() {
@ -39,10 +43,20 @@ public class EncodingDetector {
try (BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(file.getPath()))) {
charsetDetector.setText(inputStream);
CharsetMatch[] matchList = charsetDetector.detectAll();
for (CharsetMatch match : matchList) {
if (matchList == null || matchList.length == 0) {
return UNKNOWN;
}
CharsetMatch maxConfidence = matchList[0];
if (maxConfidence.getConfidence() < THRESHOLD_CONFIDENCE) {
return UNKNOWN;
}
for (int i = 1; i < matchList.length; i++) {
CharsetMatch match = matchList[i];
LOG.debug("{} : {}", match.getName(), match.getConfidence());
if (match.getConfidence() > 50) {
return match.getName();
if (match.getConfidence() >= THRESHOLD_CONFIDENCE && match.getConfidence() >= maxConfidence.getConfidence()) {
maxConfidence = match;
} else {
return maxConfidence.getName();
}
}
} catch (Exception e) {