Summarize and analyze the method of traversing the directory tree under Linux

  • 2020-04-02 01:04:42
  • OfStack

A few days ago need to realize the traversal of the entire directory tree, looked up some relevant information. The original approach was to use the readdir() and lstat() functions for recursive traversal, but it turned out that Linux had a pretty good interface for the most common operation of directory traversal: With NFTW FTW () () . The following two methods are described in detail.
1. Implement recursion manually
1.1 stat() family of functions
The stat family of functions includes the stat,fstat, and lstat functions, all of which return attribute information (metadata) to the file to the user.

view plaincopy to clipboardprint?
#include <sys/stat.h>   
       int stat(const char*pathname,struct stat*buf);   
       int fstat(int filedes,struct stat*buf);   
       int lstat(const char *pathname,struct stat*buf);  
 #include <sys/stat.h>
        int stat(const char*pathname,struct stat*buf);
        int fstat(int filedes,struct stat*buf);
        int lstat(const char *pathname,struct stat*buf); 

Return of three functions: If the success is 0, the error is -1. For a pathname, the stat function returns an information structure related to the named file, and the fstat function gets information about the file that has been opened on the descriptor filedes. The lstat function is similar to stat, but when the named file is a symbolic connection, lstat returns information about the symbolic connection, not about the file referenced by the symbolic connection.
The second argument is a pointer to a structure that we should provide. These functions fill in the structures pointed to by buf. The actual definition of the structure may be implemented differently, but its basic form is:

      struct stat{
        mode st_mode; 
        ino st_ino;
        dev st_dev;
        dev st_rdev;
        nlink st_nlink;
        uid st_uid;
        gid st_gid;
        off st_size;
        time st_atime;
        time st_mtime;
        time st_ctime;
        long st_blksize;/* The best I/O Block length */
        long st_blocks;/* The distribution of 512 Number of byte blocks 
        };

Here is a simple test

view plaincopy to clipboardprint?
#include<unistd.h>   
#include<sys/stat.h>   
#include<stdio.h>   
int  
main(int argc, char **argv){   
  struct stat buf;   
  if(stat(argv[1],&buf)) {   
    printf("[stat]:error!/n");   
    return -1;   
  }   
  printf("st_dev:%d/n",buf.st_dev);   
  printf("st_ino:%d/n",buf.st_ino);   
  printf("st_mode:%d S_ISDIR:%d/n",buf.st_mode,S_ISDIR(buf.st_mode));   
  printf("st_nlink:%d/n",buf.st_nlink);   
  printf("st_uid:%d/n",buf.st_uid);   
  printf("st_gid:%d/n",buf.st_gid);   
  printf("st_rdev:%d/n",buf.st_rdev);   
  printf("st_size:%d/n",buf.st_size);   
  printf("st_blksize:%lu/n",buf.st_blksize);   
  printf("st_blocks:%lu/n",buf.st_blocks);   
  printf("st_atime:%ld/n",buf.st_atime);   
  printf("st_mtime:%ld/n",buf.st_mtime);   
  printf("st_ctime:%ld/n",buf.st_ctime);   
  return 0;   
}  
#include<unistd.h>
#include<sys/stat.h>
#include<stdio.h>
int
main(int argc, char **argv){
  struct stat buf;
  if(stat(argv[1],&buf)) {
    printf("[stat]:error!/n");
    return -1;
  }
  printf("st_dev:%d/n",buf.st_dev);
  printf("st_ino:%d/n",buf.st_ino);
  printf("st_mode:%d S_ISDIR:%d/n",buf.st_mode,S_ISDIR(buf.st_mode));
  printf("st_nlink:%d/n",buf.st_nlink);
  printf("st_uid:%d/n",buf.st_uid);
  printf("st_gid:%d/n",buf.st_gid);
  printf("st_rdev:%d/n",buf.st_rdev);
  printf("st_size:%d/n",buf.st_size);
  printf("st_blksize:%lu/n",buf.st_blksize);
  printf("st_blocks:%lu/n",buf.st_blocks);
  printf("st_atime:%ld/n",buf.st_atime);
  printf("st_mtime:%ld/n",buf.st_mtime);
  printf("st_ctime:%ld/n",buf.st_ctime);
  return 0;
} 

Here's a footnote to the basic types of files in Linux.
Regular file. This is the most common type of file that contains some form of data. Whether the data is text or binary makes no difference to the system core. The normal file content is interpreted by the application that processes the file.
2. Directory file. This file contains the names of other files and Pointers to information about those files. Any process with read permissions to a directory file can read the contents of that directory, but only the system kernel can write to the directory file.
3. Charocter special file. This file is used for certain types of devices in the system.
4. Block special file. This file is typically used on disk devices. All devices in the system are either character-specific files or block-specific files.
5. FIFO. This file is used for inter-process communication and is sometimes referred to as a named pipe.
6. Socket. This file is used for network communication between processes. Sleeve interfaces can also be used for non-network communication between processes on a host machine.
7. Symboliclink. This file points to another file.
For file types, you can test st_mode with defined macros such as S_ISDIR() to determine the file type. Macros are S_ISREG, S_ISDIR, S_ISCHR, S_ISBLK, S_ISFIFO, S_ISLNK, S_ISSOCK.
1.2 walk through the directory example
Citing an example of someone else, and now a lot of file processing function together to use, application traversal of the specified directory files, as well as into the lower traverse subdirectory, it is transfer directories recursively to the opendir, it should be pointed out that, it's decided the subdirectories if nested too deeply, program will return failure, because the number of subdirectories flow to allow open there is a cap.

view plaincopy to clipboardprint?
  
#include <unistd.h>   
#include <stdio.h>   
#include <dirent.h>   
#include <string.h>   
#include <sys/stat.h>   
void printdir(char *dir, int depth)   
{   
    DIR *dp;   
    struct dirent *entry;   
    struct stat statbuf;   
    if((dp = opendir(dir)) == NULL) {   
        fprintf(stderr,"cannot open directory: %s/n", dir);   
        return;   
    }   
    chdir(dir);   
    while((entry = readdir(dp)) != NULL) {   
        lstat(entry->d_name,&statbuf);   
        if(S_ISDIR(statbuf.st_mode)) {   
            /**/  
            if(strcmp(".",entry->d_name) == 0 ||    
                strcmp("..",entry->d_name) == 0)   
                continue;   
            printf("%*s%s//n",depth,"",entry->d_name);   
            /**/  
            printdir(entry->d_name,depth+4);   
        }   
        else printf("%*s%s/n",depth,"",entry->d_name);   
    }   
    chdir("..");   
    closedir(dp);   
}   
/**/  
int main(int argc, char* argv[])   
{   
    char *topdir, pwd[2]=".";   
    if (argc != 2)   
        topdir=pwd;   
    else  
        topdir=argv[1];   
    printf("Directory scan of %s/n",topdir);   
    printdir(topdir,0);   
    printf("done./n");   
    exit(0);   
}  

#include <unistd.h>
#include <stdio.h>
#include <dirent.h>
#include <string.h>
#include <sys/stat.h>
void printdir(char *dir, int depth)
{
    DIR *dp;
    struct dirent *entry;
    struct stat statbuf;
    if((dp = opendir(dir)) == NULL) {
        fprintf(stderr,"cannot open directory: %s/n", dir);
        return;
    }
    chdir(dir);
    while((entry = readdir(dp)) != NULL) {
        lstat(entry->d_name,&statbuf);
        if(S_ISDIR(statbuf.st_mode)) {
            /**/
            if(strcmp(".",entry->d_name) == 0 || 
                strcmp("..",entry->d_name) == 0)
                continue;
            printf("%*s%s//n",depth,"",entry->d_name);
            /**/
            printdir(entry->d_name,depth+4);
        }
        else printf("%*s%s/n",depth,"",entry->d_name);
    }
    chdir("..");
    closedir(dp);
}
/**/
int main(int argc, char* argv[])
{
    char *topdir, pwd[2]=".";
    if (argc != 2)
        topdir=pwd;
    else
        topdir=argv[1];
    printf("Directory scan of %s/n",topdir);
    printdir(topdir,0);
    printf("done./n");
    exit(0);
} 

2. Traverse the directory using FTW calls
2.1 FTW family of functions
The method of recursive traversal of the directory tree using readdir function is relatively primitive. Glibc2.1 includes FTW and other functions, which can facilitate the traversal of the directory tree.

view plaincopy to clipboardprint?
#include <ftw.h>   
int ftw(const char *dirpath,   
        int (*fn) (const char *fpath, const struct stat *sb,int typeflag),   
        int nopenfd);   
#define _XOPEN_SOURCE 500   
#include <ftw.h>   
int nftw(const char *dirpath,   
        int (*fn) (const char *fpath, const struct stat *sb,int typeflag, struct FTW *ftwbuf),   
        int nopenfd, int flags);  
#include <ftw.h>
int ftw(const char *dirpath,
        int (*fn) (const char *fpath, const struct stat *sb,int typeflag),
        int nopenfd);
#define _XOPEN_SOURCE 500
#include <ftw.h>
int nftw(const char *dirpath,
        int (*fn) (const char *fpath, const struct stat *sb,int typeflag, struct FTW *ftwbuf),
        int nopenfd, int flags); 

Please refer to the article" FTW, nftw-file tree walk ".
FTW ()
FTW () recursively traverses the subdirectory layer by layer, starting with the directory specified by the dirpath parameter.
FTW () will pass three parameters to fn(), the first parameter *fpath points to the directory path at that time, the second parameter is *sb, is the stat structure pointer, the third parameter is flag, There are several possible values
FTW_F               General file
FTW_D             directory
FTW_DNR       Unreadable directory. The following directory will not be traversed
FTW_SL             Symbolic link
FTW_NS             The inability to obtain the stat structure data may be a permission issue
The last parameter, depth, represents the number of files that FTW () opens while traversing the directory. FTW () requires at least one file descriptor at each level of the traversal, and if the traversal runs out of the number of restrictions given by the depth, the entire traversal will be slowed by the constant closing and opening of files. (the actual test did not find...)

To end the traversal of FTW (), fn() simply returns a non-zero value that will also be the return value of FTW (). Otherwise FTW () will try to walk through all the directories and return 0
Back to back   Value: The traversal interrupt returns the return value of the fn() function, the full traversal returns 0, or -1 if an error occurs
Additional notes: Since FTW () will dynamically configure memory usage, please interrupt the traversal in the normal way (the fn function returns a non-zero value) and do not use longjmp() in the fn function
NFTW ()
Function description:
NFTW (), much like FTW (), starts with the directory specified by the dirpath parameter and recursively traverses the subdirectory layer by layer. Each time you enter a directory, the function defined by the *fn parameter is called to handle it. NFTW () passes four parameters to fn(). The first parameter *fpath points to the directory path where it is located, the second parameter *sb is the stat structure pointer (refer to stat() for structure definition), and the third parameter is the typeflag, with the following possible values:
FTW_F                                                 General file
FTW_D                                                 directory
FTW_DNR                                           Unreadable directory. The following directory will not be traversed
FTW_SL                                                 Symbolic link
FTW_NS                                               The inability to obtain the stat structure data may be a permission issue
FTW_DP                                               Directory, and the subdirectories have been traversed
FTW_SLN                                             A symbolic connection, but a connection to a file that does not exist
The fourth parameter of fn() is the FTW structure, which is defined as follows:

struct  FTW
{
     int  base;
     int  level; //Level represents the depth of traversal
}

The third parameter, depth, represents the number of files that NFTW () can open while traversing the directory.
FTW () requires at least one file descriptor at each level of the traversal, and if the traversal runs out of the number of restrictions given by the depth, the entire traversal will be slowed by the constant closing and opening of files
The last NFTW () parameter, flags, is used to specify the traversal action, and the following actions can be specified OR combined with OR
FTW_CHDIR                                 Move to the directory with chdir() before reading it
FTW_DEPTH                               Perform a depth-first search. Traverse all subdirectories before traversing the directory
FTW_MOUNT                             Traversals do not cross to other file systems
FTW_PHYS                                   Do not traverse the directory of symbolic links. By default, the symbolic links directory is traversed
To end the traversal of NFTW (), fn() simply returns a non-zero value, which is also the return value of NFTW (). Otherwise NFTW () will try to go through all the directories and return 0.
Return back to value: The traversal interrupt returns the return value of the fn() function, returns 0 after all traversal, and returns -1 if any error occurs
The difference between: FTW calls the stat function for each file, which causes the program to follow the symbolic link. This can lead to repeating directories or looping statistics for directory files in some cases (this is because of symbolic links, see advanced programming in UNIX environments for details).
NFTW calls the lstat function so there is no problem following symbolic links.
Note: When using the NFTW function, you must define #define _XOPEN_SOURCE 500, otherwise errors such as undefined will occur.
One of the things I didn't get clear on was that when I used FTW_DEPTH to traverse the entire directory tree, there was an exception in the proc directory, and I might need to specify FTW_PHYS not to traverse the symlink directory.
2. Example of traversal
A small example of a test I wrote myself. Traversing the specified directory, output file metadata and traversal depth information.

view plaincopy to clipboardprint?
#define _XOPEN_SOURCE 500    
#include<ftw.h>   
#include<sys/stat.h>   
#include<unistd.h>   
#include<stdio.h>   
#include<string.h>    
#define FILEOPEN 1024    
int gb_filecount;   
int getMetadata(const char *dirpath, const struct stat *sb,int typeflag, struct FTW *ftwbuf);   
int main(int argc, char ** argv){   

  int ret = 0;   
  struct stat pathbuf;   
  if(argc > 2){   
    printf("-nfwt_t:invalid arguments /n ");   
    return -1;   
  }   
  if(stat(argv[1],&pathbuf)){   
    printf("-nfwt_t:invalid dirpath:%s/n",argv[1]);   
    return -1;   
  }else{   
    if(0 == S_ISDIR(pathbuf.st_mode)){   
      printf("-nfwt_t:/"%s/" is not dirpath/n",argv[1]);   
      return -1;   
    }   
  }   
  gb_filecount=0;   
  ret = nftw(argv[1],getMetadata,FILEOPEN,FTW_PHYS);   
    if(ret<0){   
    printf("-nftw:[wrong:%d]ntfw search %d files/n",ret,gb_filecount);   
    return -1;   
  }else{   
    printf("-nftw:[done:%d]trasvers in %s search %d files/n",ret,argv[1],gb_filecount);   
    return 0;   
  }   
}   
int    
getMetadata(const char *dirpath, const struct stat *sb,int typeflag, struct FTW *ftwbuf){   
  printf("num:%d path:%s ",++gb_filecount,dirpath);   
  printf("st_dev:%d ",(*sb).st_dev);   
  printf("st_ino:%d ",(*sb).st_ino);   
  printf("st_mode:%d S_ISDIR:%d ",(*sb).st_mode,S_ISDIR((*sb).st_mode));   
  printf("st_nlink:%d ",(*sb).st_nlink);   
  printf("st_uid:%d ",(*sb).st_uid);   
  printf("st_gid:%d ",(*sb).st_gid);   
  printf("st_rdev:%d ",(*sb).st_rdev);   
  printf("st_size:%d ",(*sb).st_size);   
  printf("st_blksize:%lu ",(*sb).st_blksize);   
  printf("st_blocks:%lu ",(*sb).st_blocks);   
  printf("st_atime:%ld ",(*sb).st_atime);   
  printf("st_mtime:%ld ",(*sb).st_mtime);   
  printf("st_ctime:%ld ",(*sb).st_ctime);   
  printf("typeflag:%d ",typeflag);   
  printf("FTW_base:%d FTW_level:%d /n",(*ftwbuf).base,(*ftwbuf).level);   
  return 0;   
}  


Related articles: