Media filename handling improvements (#1140)

* Derive content ID from hash+filename but preserve dedupe, improve Content-Disposition handling and ASCII handling

* Linter fix

* Some more comments

* Update sytest-whitelist
This commit is contained in:
Neil Alexander 2020-06-17 11:53:26 +01:00 committed by GitHub
parent a66a3b830c
commit 5d5aa0a31d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 80 additions and 13 deletions

View File

@ -28,6 +28,7 @@ import (
"strconv"
"strings"
"sync"
"unicode"
"github.com/matrix-org/dendrite/clientapi/jsonerror"
"github.com/matrix-org/dendrite/internal/config"
@ -54,6 +55,7 @@ type downloadRequest struct {
IsThumbnailRequest bool
ThumbnailSize types.ThumbnailSize
Logger *log.Entry
DownloadFilename string
}
// Download implements GET /download and GET /thumbnail
@ -73,6 +75,7 @@ func Download(
activeRemoteRequests *types.ActiveRemoteRequests,
activeThumbnailGeneration *types.ActiveThumbnailGeneration,
isThumbnailRequest bool,
customFilename string,
) {
dReq := &downloadRequest{
MediaMetadata: &types.MediaMetadata{
@ -84,6 +87,7 @@ func Download(
"Origin": origin,
"MediaID": mediaID,
}),
DownloadFilename: customFilename,
}
if dReq.IsThumbnailRequest {
@ -301,16 +305,8 @@ func (r *downloadRequest) respondFromLocalFile(
}).Info("Responding with file")
responseFile = file
responseMetadata = r.MediaMetadata
if len(responseMetadata.UploadName) > 0 {
uploadName, err := url.PathUnescape(string(responseMetadata.UploadName))
if err != nil {
return nil, fmt.Errorf("url.PathUnescape: %w", err)
}
w.Header().Set("Content-Disposition", fmt.Sprintf(
`inline; filename=utf-8"%s"`,
strings.ReplaceAll(uploadName, `"`, `\"`), // escape quote marks only, as per RFC6266
))
if err := r.addDownloadFilenameToHeaders(w, responseMetadata); err != nil {
return nil, err
}
}
@ -329,6 +325,67 @@ func (r *downloadRequest) respondFromLocalFile(
return responseMetadata, nil
}
func (r *downloadRequest) addDownloadFilenameToHeaders(
w http.ResponseWriter,
responseMetadata *types.MediaMetadata,
) error {
// If the requestor supplied a filename to name the download then
// use that, otherwise use the filename from the response metadata.
filename := string(responseMetadata.UploadName)
if r.DownloadFilename != "" {
filename = r.DownloadFilename
}
if len(filename) == 0 {
return nil
}
unescaped, err := url.PathUnescape(filename)
if err != nil {
return fmt.Errorf("url.PathUnescape: %w", err)
}
isASCII := true // Is the string ASCII or UTF-8?
quote := `` // Encloses the string (ASCII only)
for i := 0; i < len(unescaped); i++ {
if unescaped[i] > unicode.MaxASCII {
isASCII = false
}
if unescaped[i] == 0x20 || unescaped[i] == 0x3B {
// If the filename contains a space or a semicolon, which
// are special characters in Content-Disposition
quote = `"`
}
}
// We don't necessarily want a full escape as the Content-Disposition
// can take many of the characters that PathEscape would otherwise and
// browser support for encoding is a bit wild, so we'll escape only
// the characters that we know will mess up the parsing of the
// Content-Disposition header elements themselves
unescaped = strings.ReplaceAll(unescaped, `\`, `\\"`)
unescaped = strings.ReplaceAll(unescaped, `"`, `\"`)
if isASCII {
// For ASCII filenames, we should only quote the filename if
// it needs to be done, e.g. it contains a space or a character
// that would otherwise be parsed as a control character in the
// Content-Disposition header
w.Header().Set("Content-Disposition", fmt.Sprintf(
`inline; filename=%s%s%s`,
quote, unescaped, quote,
))
} else {
// For UTF-8 filenames, we quote always, as that's the standard
w.Header().Set("Content-Disposition", fmt.Sprintf(
`inline; filename=utf-8"%s"`,
unescaped,
))
}
return nil
}
// Note: Thumbnail generation may be ongoing asynchronously.
// If no thumbnail was found then returns nil, nil, nil
func (r *downloadRequest) getThumbnailFile(

View File

@ -66,7 +66,9 @@ func Setup(
downloadHandler := makeDownloadAPI("download", cfg, db, client, activeRemoteRequests, activeThumbnailGeneration)
r0mux.Handle("/download/{serverName}/{mediaId}", downloadHandler).Methods(http.MethodGet, http.MethodOptions)
v1mux.Handle("/download/{serverName}/{mediaId}", downloadHandler).Methods(http.MethodGet, http.MethodOptions) // TODO: remove when synapse is fixed
r0mux.Handle("/download/{serverName}/{mediaId}/{downloadName}", downloadHandler).Methods(http.MethodGet, http.MethodOptions)
v1mux.Handle("/download/{serverName}/{mediaId}", downloadHandler).Methods(http.MethodGet, http.MethodOptions) // TODO: remove when synapse is fixed
v1mux.Handle("/download/{serverName}/{mediaId}/{downloadName}", downloadHandler).Methods(http.MethodGet, http.MethodOptions) // TODO: remove when synapse is fixed
r0mux.Handle("/thumbnail/{serverName}/{mediaId}",
makeDownloadAPI("thumbnail", cfg, db, client, activeRemoteRequests, activeThumbnailGeneration),
@ -120,6 +122,7 @@ func makeDownloadAPI(
activeRemoteRequests,
activeThumbnailGeneration,
name == "thumbnail",
vars["downloadName"],
)
}
return promhttp.InstrumentHandlerCounter(counterVec, http.HandlerFunc(httpHandler))

View File

@ -16,6 +16,7 @@ package routing
import (
"context"
"encoding/base64"
"fmt"
"io"
"net/http"
@ -123,7 +124,9 @@ func (r *uploadRequest) doUpload(
r.MediaMetadata.FileSizeBytes = bytesWritten
r.MediaMetadata.Base64Hash = hash
r.MediaMetadata.MediaID = types.MediaID(hash)
r.MediaMetadata.MediaID = types.MediaID(base64.RawURLEncoding.EncodeToString(
[]byte(string(r.MediaMetadata.UploadName) + string(r.MediaMetadata.Base64Hash)),
))
r.Logger = r.Logger.WithField("MediaID", r.MediaMetadata.MediaID)

View File

@ -128,7 +128,7 @@ Outbound federation can send events
# test for now.
#Backfill checks the events requested belong to the room
Can upload without a file name
#Can download without a file name locally
Can download without a file name locally
Can upload with ASCII file name
Can send image in room message
AS cannot create users outside its own namespace
@ -327,3 +327,7 @@ Can upload with Unicode file name
POSTed media can be thumbnailed
Remote media can be thumbnailed
Can download with Unicode file name locally
Can download file 'ascii'
Can download file 'name with spaces'
Can download file 'name;with;semicolons'
Can download specifying a different ASCII file name