mirror of
				https://kkgithub.com/actions/setup-python.git
				synced 2025-11-04 12:44:05 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			303 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			303 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
"use strict";
 | 
						|
const whatwgEncoding = require("whatwg-encoding");
 | 
						|
 | 
						|
// https://html.spec.whatwg.org/#encoding-sniffing-algorithm
 | 
						|
module.exports = function sniffHTMLEncoding(buffer, options) {
 | 
						|
  let encoding = whatwgEncoding.getBOMEncoding(buffer); // see https://github.com/whatwg/html/issues/1910
 | 
						|
 | 
						|
  if (options === undefined) {
 | 
						|
    options = {};
 | 
						|
  }
 | 
						|
 | 
						|
  if (encoding === null && options.transportLayerEncodingLabel !== undefined) {
 | 
						|
    encoding = whatwgEncoding.labelToName(options.transportLayerEncodingLabel);
 | 
						|
  }
 | 
						|
 | 
						|
  if (encoding === null) {
 | 
						|
    encoding = prescanMetaCharset(buffer);
 | 
						|
  }
 | 
						|
 | 
						|
  if (encoding === null && options.defaultEncoding !== undefined) {
 | 
						|
    encoding = options.defaultEncoding;
 | 
						|
  }
 | 
						|
 | 
						|
  if (encoding === null) {
 | 
						|
    encoding = "windows-1252";
 | 
						|
  }
 | 
						|
 | 
						|
  return encoding;
 | 
						|
};
 | 
						|
 | 
						|
// https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding
 | 
						|
function prescanMetaCharset(buffer) {
 | 
						|
  const l = Math.min(buffer.length, 1024);
 | 
						|
  for (let i = 0; i < l; i++) {
 | 
						|
    let c = buffer[i];
 | 
						|
    if (c === 0x3C) {
 | 
						|
      // "<"
 | 
						|
      let c1 = buffer[i + 1];
 | 
						|
      let c2 = buffer[i + 2];
 | 
						|
      const c3 = buffer[i + 3];
 | 
						|
      const c4 = buffer[i + 4];
 | 
						|
      const c5 = buffer[i + 5];
 | 
						|
      // !-- (comment start)
 | 
						|
      if (c1 === 0x21 && c2 === 0x2D && c3 === 0x2D) {
 | 
						|
        i += 4;
 | 
						|
        for (; i < l; i++) {
 | 
						|
          c = buffer[i];
 | 
						|
          c1 = buffer[i + 1];
 | 
						|
          c2 = buffer[i + 2];
 | 
						|
          // --> (comment end)
 | 
						|
          if (c === 0x2D && c1 === 0x2D && c2 === 0x3E) {
 | 
						|
            i += 2;
 | 
						|
            break;
 | 
						|
          }
 | 
						|
        }
 | 
						|
      } else if ((c1 === 0x4D || c1 === 0x6D) &&
 | 
						|
         (c2 === 0x45 || c2 === 0x65) &&
 | 
						|
         (c3 === 0x54 || c3 === 0x74) &&
 | 
						|
         (c4 === 0x41 || c4 === 0x61) &&
 | 
						|
         (isSpaceCharacter(c5) || c5 === 0x2F)) {
 | 
						|
        // "meta" + space or /
 | 
						|
        i += 6;
 | 
						|
        let gotPragma = false;
 | 
						|
        let needPragma = null;
 | 
						|
        let charset = null;
 | 
						|
 | 
						|
        let attrRes;
 | 
						|
        do {
 | 
						|
          attrRes = getAttribute(buffer, i, l);
 | 
						|
          if (attrRes.attr) {
 | 
						|
            if (attrRes.attr.name === "http-equiv") {
 | 
						|
              gotPragma = attrRes.attr.value === "content-type";
 | 
						|
            } else if (attrRes.attr.name === "content" && !charset) {
 | 
						|
              charset = extractCharacterEncodingFromMeta(attrRes.attr.value);
 | 
						|
              if (charset !== null) {
 | 
						|
                needPragma = true;
 | 
						|
              }
 | 
						|
            } else if (attrRes.attr.name === "charset") {
 | 
						|
              charset = whatwgEncoding.labelToName(attrRes.attr.value);
 | 
						|
              needPragma = false;
 | 
						|
            }
 | 
						|
          }
 | 
						|
          i = attrRes.i;
 | 
						|
        } while (attrRes.attr);
 | 
						|
 | 
						|
        if (needPragma === null) {
 | 
						|
          continue;
 | 
						|
        }
 | 
						|
        if (needPragma === true && gotPragma === false) {
 | 
						|
          continue;
 | 
						|
        }
 | 
						|
        if (charset === null) {
 | 
						|
          continue;
 | 
						|
        }
 | 
						|
 | 
						|
        if (charset === "UTF-16LE" || charset === "UTF-16BE") {
 | 
						|
          charset = "UTF-8";
 | 
						|
        }
 | 
						|
        if (charset === "x-user-defined") {
 | 
						|
          charset = "windows-1252";
 | 
						|
        }
 | 
						|
 | 
						|
        return charset;
 | 
						|
      } else if ((c1 >= 0x41 && c1 <= 0x5A) || (c1 >= 0x61 && c1 <= 0x7A)) {
 | 
						|
        // a-z or A-Z
 | 
						|
        for (i += 2; i < l; i++) {
 | 
						|
          c = buffer[i];
 | 
						|
          // space or >
 | 
						|
          if (isSpaceCharacter(c) || c === 0x3E) {
 | 
						|
            break;
 | 
						|
          }
 | 
						|
        }
 | 
						|
        let attrRes;
 | 
						|
        do {
 | 
						|
          attrRes = getAttribute(buffer, i, l);
 | 
						|
          i = attrRes.i;
 | 
						|
        } while (attrRes.attr);
 | 
						|
      } else if (c1 === 0x21 || c1 === 0x2F || c1 === 0x3F) {
 | 
						|
        // ! or / or ?
 | 
						|
        for (i += 2; i < l; i++) {
 | 
						|
          c = buffer[i];
 | 
						|
          // >
 | 
						|
          if (c === 0x3E) {
 | 
						|
            break;
 | 
						|
          }
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return null;
 | 
						|
}
 | 
						|
 | 
						|
// https://html.spec.whatwg.org/multipage/syntax.html#concept-get-attributes-when-sniffing
 | 
						|
function getAttribute(buffer, i, l) {
 | 
						|
  for (; i < l; i++) {
 | 
						|
    let c = buffer[i];
 | 
						|
    // space or /
 | 
						|
    if (isSpaceCharacter(c) || c === 0x2F) {
 | 
						|
      continue;
 | 
						|
    }
 | 
						|
    // ">"
 | 
						|
    if (c === 0x3E) {
 | 
						|
      i++;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    let name = "";
 | 
						|
    let value = "";
 | 
						|
    nameLoop:for (; i < l; i++) {
 | 
						|
      c = buffer[i];
 | 
						|
      // "="
 | 
						|
      if (c === 0x3D && name !== "") {
 | 
						|
        i++;
 | 
						|
        break;
 | 
						|
      }
 | 
						|
      // space
 | 
						|
      if (isSpaceCharacter(c)) {
 | 
						|
        for (i++; i < l; i++) {
 | 
						|
          c = buffer[i];
 | 
						|
          // space
 | 
						|
          if (isSpaceCharacter(c)) {
 | 
						|
            continue;
 | 
						|
          }
 | 
						|
          // not "="
 | 
						|
          if (c !== 0x3D) {
 | 
						|
            return { attr: { name, value }, i };
 | 
						|
          }
 | 
						|
 | 
						|
          i++;
 | 
						|
          break nameLoop;
 | 
						|
        }
 | 
						|
        break;
 | 
						|
      }
 | 
						|
      // / or >
 | 
						|
      if (c === 0x2F || c === 0x3E) {
 | 
						|
        return { attr: { name, value }, i };
 | 
						|
      }
 | 
						|
      // A-Z
 | 
						|
      if (c >= 0x41 && c <= 0x5A) {
 | 
						|
        name += String.fromCharCode(c + 0x20); // lowercase
 | 
						|
      } else {
 | 
						|
        name += String.fromCharCode(c);
 | 
						|
      }
 | 
						|
    }
 | 
						|
    c = buffer[i];
 | 
						|
    // space
 | 
						|
    if (isSpaceCharacter(c)) {
 | 
						|
      for (i++; i < l; i++) {
 | 
						|
        c = buffer[i];
 | 
						|
        // space
 | 
						|
        if (isSpaceCharacter(c)) {
 | 
						|
          continue;
 | 
						|
        } else {
 | 
						|
          break;
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
    // " or '
 | 
						|
    if (c === 0x22 || c === 0x27) {
 | 
						|
      const quote = c;
 | 
						|
      for (i++; i < l; i++) {
 | 
						|
        c = buffer[i];
 | 
						|
 | 
						|
        if (c === quote) {
 | 
						|
          i++;
 | 
						|
          return { attr: { name, value }, i };
 | 
						|
        }
 | 
						|
 | 
						|
        // A-Z
 | 
						|
        if (c >= 0x41 && c <= 0x5A) {
 | 
						|
          value += String.fromCharCode(c + 0x20); // lowercase
 | 
						|
        } else {
 | 
						|
          value += String.fromCharCode(c);
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    // >
 | 
						|
    if (c === 0x3E) {
 | 
						|
      return { attr: { name, value }, i };
 | 
						|
    }
 | 
						|
 | 
						|
    // A-Z
 | 
						|
    if (c >= 0x41 && c <= 0x5A) {
 | 
						|
      value += String.fromCharCode(c + 0x20); // lowercase
 | 
						|
    } else {
 | 
						|
      value += String.fromCharCode(c);
 | 
						|
    }
 | 
						|
 | 
						|
    for (i++; i < l; i++) {
 | 
						|
      c = buffer[i];
 | 
						|
 | 
						|
      // space or >
 | 
						|
      if (isSpaceCharacter(c) || c === 0x3E) {
 | 
						|
        return { attr: { name, value }, i };
 | 
						|
      }
 | 
						|
 | 
						|
      // A-Z
 | 
						|
      if (c >= 0x41 && c <= 0x5A) {
 | 
						|
        value += String.fromCharCode(c + 0x20); // lowercase
 | 
						|
      } else {
 | 
						|
        value += String.fromCharCode(c);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return { i };
 | 
						|
}
 | 
						|
 | 
						|
function extractCharacterEncodingFromMeta(string) {
 | 
						|
  let position = 0;
 | 
						|
 | 
						|
  while (true) {
 | 
						|
    let subPosition = string.substring(position).search(/charset/i);
 | 
						|
 | 
						|
    if (subPosition === -1) {
 | 
						|
      return null;
 | 
						|
    }
 | 
						|
    subPosition += "charset".length;
 | 
						|
 | 
						|
    while (isSpaceCharacter(string[subPosition].charCodeAt(0))) {
 | 
						|
      ++subPosition;
 | 
						|
    }
 | 
						|
 | 
						|
    if (string[subPosition] !== "=") {
 | 
						|
      position = subPosition - 1;
 | 
						|
      continue;
 | 
						|
    }
 | 
						|
 | 
						|
    ++subPosition;
 | 
						|
 | 
						|
    while (isSpaceCharacter(string[subPosition].charCodeAt(0))) {
 | 
						|
      ++subPosition;
 | 
						|
    }
 | 
						|
 | 
						|
    position = subPosition;
 | 
						|
    break;
 | 
						|
  }
 | 
						|
 | 
						|
  if (string[position] === "\"" || string[position] === "'") {
 | 
						|
    const nextIndex = string.indexOf(string[position], position + 1);
 | 
						|
 | 
						|
    if (nextIndex !== -1) {
 | 
						|
      return whatwgEncoding.labelToName(string.substring(position + 1, nextIndex));
 | 
						|
    }
 | 
						|
 | 
						|
    // It is an unmatched quotation mark
 | 
						|
    return null;
 | 
						|
  }
 | 
						|
 | 
						|
  if (string.length === position + 1) {
 | 
						|
    return null;
 | 
						|
  }
 | 
						|
 | 
						|
  let end = string.substring(position + 1).search(/\x09|\x0A|\x0C|\x0D|\x20|;/);
 | 
						|
  if (end === -1) {
 | 
						|
    end = string.length;
 | 
						|
  }
 | 
						|
  return whatwgEncoding.labelToName(string.substring(position, end));
 | 
						|
}
 | 
						|
 | 
						|
function isSpaceCharacter(c) {
 | 
						|
  return c === 0x09 || c === 0x0A || c === 0x0C || c === 0x0D || c === 0x20;
 | 
						|
}
 |