Source code for pyftle.file_utils

import os
from pathlib import Path
from typing import List

import pandas as pd


[docs] def find_files_with_pattern(root_dir: str, pattern: str) -> list[str]: """ Recursively search for files matching a given substring pattern. This function traverses the specified root directory and all of its subdirectories, returning all files whose names contain the given pattern. Parameters ---------- root_dir : str Path to the root directory where the search begins. pattern : str Substring pattern to match within filenames. Returns ------- list[str] Sorted list of full file paths that contain the given pattern. Examples -------- >>> find_files_with_pattern("/data", "velocity") ['/data/run1/velocity_001.csv', '/data/run2/velocity_002.csv'] """ root_path = Path(root_dir) # Search recursively for files containing the pattern matching_files = sorted(str(file) for file in root_path.rglob(f"*{pattern}*")) return matching_files
[docs] def write_list_to_txt(file_list: list[str], output_file: str) -> None: """ Write a list of file paths to a text file, one path per line. Parameters ---------- file_list : list[str] List of file paths to write. output_file : str Path to the output text file. Notes ----- The output file is overwritten if it already exists. Examples -------- >>> write_list_to_txt(["a.txt", "b.txt"], "files.txt") # Creates a file 'files.txt' with: # a.txt # b.txt """ with open(output_file, "w") as f: for file_path in file_list: f.write(file_path + "\n")
[docs] def get_files_list(file_path: str) -> List[str]: """ Read a text file containing a list of file paths. This function expects a plain text file (e.g., generated by :func:`write_list_to_txt`) where each line contains a file path. It returns the list of paths as strings. Parameters ---------- file_path : str Path to the text file containing the list of file paths. Returns ------- list[str] List of file paths read from the file. Raises ------ FileNotFoundError If the specified file does not exist. Examples -------- >>> get_files_list("files.txt") ['a.txt', 'b.txt'] """ if os.path.exists(file_path): data = pd.read_csv(file_path, header=None, dtype=str) # type: ignore return data.iloc[:, 0].tolist() # type: ignore else: raise FileNotFoundError(f"File not found at {file_path}")