Source code for pyftle.file_utils
import os
from pathlib import Path
from typing import List
import pandas as pd
[docs]
def find_files_with_pattern(root_dir: str, pattern: str) -> list[str]:
"""
Recursively search for files matching a given substring pattern.
This function traverses the specified root directory and all of its
subdirectories, returning all files whose names contain the given
pattern.
Parameters
----------
root_dir : str
Path to the root directory where the search begins.
pattern : str
Substring pattern to match within filenames.
Returns
-------
list[str]
Sorted list of full file paths that contain the given pattern.
Examples
--------
>>> find_files_with_pattern("/data", "velocity")
['/data/run1/velocity_001.csv', '/data/run2/velocity_002.csv']
"""
root_path = Path(root_dir)
# Search recursively for files containing the pattern
matching_files = sorted(str(file) for file in root_path.rglob(f"*{pattern}*"))
return matching_files
[docs]
def write_list_to_txt(file_list: list[str], output_file: str) -> None:
"""
Write a list of file paths to a text file, one path per line.
Parameters
----------
file_list : list[str]
List of file paths to write.
output_file : str
Path to the output text file.
Notes
-----
The output file is overwritten if it already exists.
Examples
--------
>>> write_list_to_txt(["a.txt", "b.txt"], "files.txt")
# Creates a file 'files.txt' with:
# a.txt
# b.txt
"""
with open(output_file, "w") as f:
for file_path in file_list:
f.write(file_path + "\n")
[docs]
def get_files_list(file_path: str) -> List[str]:
"""
Read a text file containing a list of file paths.
This function expects a plain text file (e.g., generated by
:func:`write_list_to_txt`) where each line contains a file path.
It returns the list of paths as strings.
Parameters
----------
file_path : str
Path to the text file containing the list of file paths.
Returns
-------
list[str]
List of file paths read from the file.
Raises
------
FileNotFoundError
If the specified file does not exist.
Examples
--------
>>> get_files_list("files.txt")
['a.txt', 'b.txt']
"""
if os.path.exists(file_path):
data = pd.read_csv(file_path, header=None, dtype=str) # type: ignore
return data.iloc[:, 0].tolist() # type: ignore
else:
raise FileNotFoundError(f"File not found at {file_path}")