1+ #include < iostream>
2+ #include < conio.h>
3+ #include < Windows.h>
4+ #include < fstream>
5+ #include " scan.h"
6+ using namespace std ;
7+
8+ bool Scan::ListDirectoryContents (const wchar_t *sDir )
9+ {
10+ WIN32_FIND_DATA fdFile;
11+ HANDLE hFind = NULL ;
12+
13+ wchar_t sPath [2048 ];
14+
15+ // Specify a file mask. *.* = We want everything!
16+ wsprintf (sPath , L" %s\\ *.*" , sDir );
17+
18+ if ((hFind = FindFirstFile (sPath , &fdFile)) == INVALID_HANDLE_VALUE)
19+ {
20+ wprintf (L" Path not found: [%s]\n " , sDir );
21+ return false ;
22+ }
23+
24+ do
25+ {
26+ // Find first file will always return "."
27+ // and ".." as the first two directories.
28+ if (wcscmp (fdFile.cFileName , L" ." ) != 0
29+ && wcscmp (fdFile.cFileName , L" .." ) != 0 )
30+ {
31+ // Build up our file path using the passed in
32+ // [sDir] and the file/foldername we just found:
33+ wsprintf (sPath , L" %s\\ %s" , sDir , fdFile.cFileName );
34+
35+ // Is the entity a File or Folder?
36+ if (fdFile.dwFileAttributes &FILE_ATTRIBUTE_DIRECTORY)
37+ {
38+ wprintf (L" Directory: %s\n " , sPath );
39+ ListDirectoryContents (sPath ); // Recursion, I love it!
40+
41+ }
42+ else {
43+ process (sPath );
44+ }
45+ }
46+ } while (FindNextFile (hFind, &fdFile)); // Find the next file.
47+
48+ FindClose (hFind); // Always, Always, clean things up!
49+
50+ return true ;
51+ }
52+
53+
54+ int Process::process (wchar_t * file)
55+ {
56+ wcout << " \n Processing: " << file << endl;
57+ double size = file_size_calculator (file);
58+ cout << " \n\n Size of the file :" << size << " \n " ;
59+ return 0 ;
60+ }
61+
62+ int Process::file_size_calculator (wchar_t * file)
63+ {
64+ wcout << " \n Computing the size of " << file<<endl;
65+ ifstream in;
66+ in.open (file, ifstream::ate | ifstream::binary);
67+ double size = in.tellg ();
68+ bool duplicate_size = file_duplication_detector (size, file);
69+ if (duplicate_size == true )cout << " Files with duplicate sizes have been found" << endl;
70+ return size;
71+ }
72+
73+
74+ // This memeber function is used to detect files with same sizes
75+ // Note files with same sizes even in bytes is not said to be duplicates!
76+ // It needs to be processed further
77+ bool Process::file_duplication_detector (double size, wchar_t * file)
78+ {
79+ map<double , wchar_t *>::iterator itr;
80+ itr = fileduplicates.find (size);
81+ if (itr != fileduplicates.end ())
82+ {
83+ // This will create a list of files with same sizes
84+ list_of_duplicates.push_back (itr->second );
85+ list_of_duplicates.push_back (file);
86+ return true ;
87+ }
88+ else
89+ {
90+ fileduplicates[size] = file;
91+ }
92+ return false ;
93+ }
94+
95+ void Process::hasher ()
96+ {
97+ // Half open and closed iterator implementation!
98+ // I know there are other ways to do this
99+ // But half open and closed method is standard for almost all STL stuffs like vector,deque etc.,
100+ list<wchar_t *>::iterator itr1 = list_of_duplicates.begin ();
101+ list<wchar_t *>::iterator itr2 = list_of_duplicates.end ();
102+ // Common iterator
103+ // I've heard from someone ++something is faster than something++ for STL iterators
104+ // I've forgetten why :) you may have the answer for it if yes drop some comments please!
105+ map<string, wchar_t *>::iterator dupe;
106+ for (list<wchar_t *>::iterator itr = itr1; itr != itr2; ++itr)
107+ {
108+ char hash[MAX_PATH];
109+ wcstombs (hash, *itr, MAX_PATH);
110+ string md5 = generate_digest (hash);
111+ // Create an iterator for map
112+ dupe = duplicates.find (md5);
113+ if (dupe != duplicates.end ())
114+ {
115+ entries.push_back (dupe->second );
116+ entries.push_back (*itr);
117+ }
118+ else
119+ {
120+ duplicates[md5] = *itr;
121+ }
122+ }
123+ }
124+
125+ void Process::display ()
126+ {
127+ list<wchar_t *>::iterator itr1 = entries.begin ();
128+ list<wchar_t *>::iterator itr2 = entries.end ();
129+ for (list<wchar_t *>::iterator itr = itr1; itr != itr2; ++itr)
130+ {
131+ cout << " \n " ;
132+ wcout << " => Duplicates" << *itr << " \n " ;
133+ }
134+ }
0 commit comments