@@ -335,7 +335,7 @@ def detect_file_type(stream: bytes) -> object | None:
335335 return file_type
336336
337337
338- def normalise_file_name_with_ext (file_name : str , stream : bytes ) -> str :
338+ def normalise_file_name_with_ext (file_name : str , stream : bytes , file_type : object | None = None ) -> str :
339339 """Normalize filename and ensure an extension is present.
340340
341341 LibreOffice relies on a reasonable filename with an extension to select
@@ -346,6 +346,7 @@ def normalise_file_name_with_ext(file_name: str, stream: bytes) -> str:
346346 Args:
347347 file_name: Original file name (may be empty or extension-less).
348348 stream: File content used for extension inference.
349+ file_type: Optional previously detected file type descriptor.
349350
350351 Returns:
351352 str: Normalized file name with an extension.
@@ -361,20 +362,25 @@ def normalise_file_name_with_ext(file_name: str, stream: bytes) -> str:
361362 if ext :
362363 return base + ext
363364
364- # 2) let filetype guess it from content
365+ # 2) prefer an already detected extension when available
366+ detected_ext = getattr (file_type , "extension" , None )
367+ if detected_ext :
368+ return f"{ base } .{ str (detected_ext )} "
369+
370+ # 3) let filetype guess it from content
365371 guessed_ext = filetype .guess_extension (stream )
366372 if guessed_ext :
367373 return f"{ base } .{ guessed_ext } "
368374
369- # 3 ) fallbacks for texty formats our filetype may not catch
375+ # 4 ) fallbacks for texty formats our filetype may not catch
370376 if is_file_type_html (stream ):
371377 return base + ".html"
372378 if is_file_type_xml (stream ):
373379 return base + ".xml"
374380 if is_file_type_rtf (stream ):
375381 return base + ".rtf"
376382
377- # 4 ) only tag as plain text when the content actually looks like text
383+ # 5 ) only tag as plain text when the content actually looks like text
378384 if is_file_content_plain_text (stream ):
379385 return base + ".txt"
380386
0 commit comments