diff --git a/src/main/java/org/jcnc/jnotepad/tool/EncodingDetector.java b/src/main/java/org/jcnc/jnotepad/tool/EncodingDetector.java index dc0fac6..b601e40 100644 --- a/src/main/java/org/jcnc/jnotepad/tool/EncodingDetector.java +++ b/src/main/java/org/jcnc/jnotepad/tool/EncodingDetector.java @@ -23,6 +23,10 @@ import static org.jcnc.jnotepad.constants.TextConstants.UNKNOWN; public class EncodingDetector { private static final Logger LOG = LogUtil.getLogger(EncodingDetector.class); + /** + * 编码侦测概率,阈值:50% + */ + public static final int THRESHOLD_CONFIDENCE = 50; private EncodingDetector() { @@ -39,10 +43,20 @@ public class EncodingDetector { try (BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(file.getPath()))) { charsetDetector.setText(inputStream); CharsetMatch[] matchList = charsetDetector.detectAll(); - for (CharsetMatch match : matchList) { + if (matchList == null || matchList.length == 0) { + return UNKNOWN; + } + CharsetMatch maxConfidence = matchList[0]; + if (maxConfidence.getConfidence() < THRESHOLD_CONFIDENCE) { + return UNKNOWN; + } + for (int i = 1; i < matchList.length; i++) { + CharsetMatch match = matchList[i]; LOG.debug("{} : {}", match.getName(), match.getConfidence()); - if (match.getConfidence() > 50) { - return match.getName(); + if (match.getConfidence() >= THRESHOLD_CONFIDENCE && match.getConfidence() >= maxConfidence.getConfidence()) { + maxConfidence = match; + } else { + return maxConfidence.getName(); } } } catch (Exception e) {