Solution of Chinese name garbled code in uploaded files based on HttpClient
- 2021-10-15 10:42:01
- OfStack
Phenomenon
When uploading files with HttpClient tool, if the file name is in Chinese, the file name will be garbled
Code of file name garbled:
private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension,File fileToUpload) {
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
builder.addTextBody("scenarioId", scenarioId.toString());
for (String groupId : groupIds) {
builder.addTextBody("groupIds", groupId);
}
builder.addTextBody("extension", extension);
builder.addPart("fileToUpload", new FileBody(fileToUpload));
builder.addTextBody("type", AssetFileTypeEnum.CSV.getName());
builder.addTextBody("isSplit", "false");
builder.addTextBody("isRefresh", "false");
return builder.build();
Cause of garbled code:
When HttpClient uploads a file, it will call doWriteTo method to write an output stream, but when calling formatMultipartHeader method, there are mainly three different implementations in the bottom layer, and the character sets used in the three ways are different
doWriteTo method in HttpClient:
void doWriteTo(
final OutputStream out,
final boolean writeContent) throws IOException {
final ByteArrayBuffer boundaryEncoded = encode(this.charset, this.boundary);
for (final FormBodyPart part: getBodyParts()) {
writeBytes(TWO_DASHES, out);
writeBytes(boundaryEncoded, out);
writeBytes(CR_LF, out);
// The code here mainly includes 3 Different implementations , Different mode, The implementation mode is not 1 The character set used is also different
formatMultipartHeader(part, out);
writeBytes(CR_LF, out);
if (writeContent) {
part.getBody().writeTo(out);
}
writeBytes(CR_LF, out);
}
writeBytes(TWO_DASHES, out);
writeBytes(boundaryEncoded, out);
writeBytes(TWO_DASHES, out);
writeBytes(CR_LF, out);
}
Among them, formatMultipartHeader method has different implementation modes according to different modes
MultipartEntityBuilder
MultipartFormEntity buildEntity() {
String boundaryCopy = boundary;
if (boundaryCopy == null && contentType != null) {
boundaryCopy = contentType.getParameter("boundary");
}
if (boundaryCopy == null) {
boundaryCopy = generateBoundary();
}
Charset charsetCopy = charset;
if (charsetCopy == null && contentType != null) {
charsetCopy = contentType.getCharset();
}
final List<NameValuePair> paramsList = new ArrayList<NameValuePair>(2);
paramsList.add(new BasicNameValuePair("boundary", boundaryCopy));
if (charsetCopy != null) {
paramsList.add(new BasicNameValuePair("charset", charsetCopy.name()));
}
final NameValuePair[] params = paramsList.toArray(new NameValuePair[paramsList.size()]);
final ContentType contentTypeCopy = contentType != null ?
contentType.withParameters(params) :
ContentType.create("multipart/" + DEFAULT_SUBTYPE, params);
final List<FormBodyPart> bodyPartsCopy = bodyParts != null ? new ArrayList<FormBodyPart>(bodyParts) :
Collections.<FormBodyPart>emptyList();
// Here will mode Assign a value to modeCopy
final HttpMultipartMode modeCopy = mode != null ? mode : HttpMultipartMode.STRICT;
final AbstractMultipartForm form;
// According to here modeCopy The value of the is different, and the construction 3 Species form Each character set does not 1 Sample is also the root of garbled code
switch (modeCopy) {
case BROWSER_COMPATIBLE:
form = new HttpBrowserCompatibleMultipart(charsetCopy, boundaryCopy, bodyPartsCopy);
break;
case RFC6532:
form = new HttpRFC6532Multipart(charsetCopy, boundaryCopy, bodyPartsCopy);
break;
default:
form = new HttpStrictMultipart(charsetCopy, boundaryCopy, bodyPartsCopy);
}
return new MultipartFormEntity(form, contentTypeCopy, form.getTotalLength());
}
public HttpEntity build() {
return buildEntity();
}
formatMultipartHeader Method in BROWSER_COMPATIBLE Mode
class HttpBrowserCompatibleMultipart extends AbstractMultipartForm {
private final List<FormBodyPart> parts;
public HttpBrowserCompatibleMultipart(
final Charset charset,
final String boundary,
final List<FormBodyPart> parts) {
super(charset, boundary);
this.parts = parts;
}
@Override
public List<FormBodyPart> getBodyParts() {
return this.parts;
}
/**
* Write the multipart header fields; depends on the style.
*/
@Override
protected void formatMultipartHeader(
final FormBodyPart part,
final OutputStream out) throws IOException {
// For browser-compatible, only write Content-Disposition
// Use content charset
final Header header = part.getHeader();
final MinimalField cd = header.getField(MIME.CONTENT_DISPOSITION);
// You can see that the character set here is the set character set
writeField(cd, this.charset, out);
final String filename = part.getBody().getFilename();
if (filename != null) {
final MinimalField ct = header.getField(MIME.CONTENT_TYPE);
// You can see that the character set here is also the set character set
writeField(ct, this.charset, out);
}
}
}
formatMultipartHeader Method in RFC6532 Mode
class HttpRFC6532Multipart extends AbstractMultipartForm {
private final List<FormBodyPart> parts;
public HttpRFC6532Multipart(
final Charset charset,
final String boundary,
final List<FormBodyPart> parts) {
super(charset, boundary);
this.parts = parts;
}
@Override
public List<FormBodyPart> getBodyParts() {
return this.parts;
}
@Override
protected void formatMultipartHeader(
final FormBodyPart part,
final OutputStream out) throws IOException {
// For RFC6532, we output all fields with UTF-8 encoding.
final Header header = part.getHeader();
for (final MinimalField field: header) {
// You can see that the character set here defaults to UTF8
writeField(field, MIME.UTF8_CHARSET, out);
}
}
}
formatMultipartHeader method in default mode
class HttpStrictMultipart extends AbstractMultipartForm {
private final List<FormBodyPart> parts;
public HttpStrictMultipart(
final Charset charset,
final String boundary,
final List<FormBodyPart> parts) {
super(charset, boundary);
this.parts = parts;
}
@Override
public List<FormBodyPart> getBodyParts() {
return this.parts;
}
@Override
protected void formatMultipartHeader(
final FormBodyPart part,
final OutputStream out) throws IOException {
// For strict, we output all fields with MIME-standard encoding.
// As can be seen from the above comments, the character set here adopts the default character set, that is, ASCII (Below MIME You can see it in the class)
final Header header = part.getHeader();
for (final MinimalField field: header) {
writeField(field, out);
}
}
}
Class MIME
public final class MIME {
public static final String CONTENT_TYPE = "Content-Type";
public static final String CONTENT_TRANSFER_ENC = "Content-Transfer-Encoding";
public static final String CONTENT_DISPOSITION = "Content-Disposition";
public static final String ENC_8BIT = "8bit";
public static final String ENC_BINARY = "binary";
/** The default character set to be used, i.e. "US-ASCII" */
public static final Charset DEFAULT_CHARSET = Consts.ASCII;
/** UTF-8 is used for RFC6532 */
public static final Charset UTF8_CHARSET = Consts.UTF_8;
}
Solution
Knowing the root of garbled code, garbled code problem will be solved, and there are two ways to solve it
Set mode to: BROWSER_COMPATIBLE and set the character set to UTF8
private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension,
File fileToUpload) {
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
// Set the mode to BROWSER_COMPATIBLE And set the character set to UTF8
builder.setMode(HttpMultipartMode.BROWSER_COMPATIBLE);
builder.setCharset(Charset.forName("UTF-8"));
builder.addTextBody("scenarioId", scenarioId.toString());
for (String groupId : groupIds) {
builder.addTextBody("groupIds", groupId);
}
builder.addTextBody("extension", extension);
builder.addPart("fileToUpload", new FileBody(fileToUpload));
builder.addTextBody("type", AssetFileTypeEnum.CSV.getName());
builder.addTextBody("isSplit", "false");
builder.addTextBody("isRefresh", "false");
return builder.build();
}
Set the mode to: RFC6532
private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension,
File fileToUpload) {
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
// Set the mode to RFC6532
builder.setMode(HttpMultipartMode.RFC6532);
builder.addTextBody("scenarioId", scenarioId.toString());
for (String groupId : groupIds) {
builder.addTextBody("groupIds", groupId);
}
builder.addTextBody("extension", extension);
builder.addPart("fileToUpload", new FileBody(fileToUpload));
builder.addTextBody("type", AssetFileTypeEnum.CSV.getName());
builder.addTextBody("isSplit", "false");
builder.addTextBody("isRefresh", "false");
return builder.build();
}