A couple of months ago, I posted about my experience with making a Python dependency graph.
Of course, as the post states, I was originally looking for a way to make a graph showing the relationship among PHP files in regard to "include" statements.
Well, I'm home sick and after a few hours of trying to find an easy, out-of-box solution I gave up and rolled my own Python script to make me a DOT graph file.
I didn't have anything better to do.

The results are pretty simplistic, but I'm happy enough with it for now.
The Python script takes three arguments: the directory in which the PHP files exist, whether to search recursively or not (0=no, 1=yes), and the name of the output file as such:
$ python makeDOT.py blog/wordpress 1 wordpressIncludes.dot
#####
#importing modules
import glob, re, sys, os, fnmatch
br = "\n"
tab = "\t"
#####
#exiting if all 3 arguments are not passed via command line
def fail():
print ("ERROR: " + str(len(sys.argv)-1) + " of 3 required arguments provided.")
sys.exit()
#####
#getting arguments passed via command line
#testing for root DIRECTORY string
try: myDir = sys.argv[1]
except: fail()
#testing for RECURSION boolean
try: myRec = sys.argv[2]
except: fail()
#testing for OUTPUT filename string
try: myFile = sys.argv[3]
except: fail()
#####
#making list of PHP files within DIRECTORY
if myRec == "0": #without recursion
myDir2 = myDir + "/*.php"
PHP_list = glob.glob(myDir2)
elif myRec == "1": #with recursion
PHP_list = []
for dirname, dirnames, filenames in os.walk(myDir):
for filename in filenames:
if fnmatch.fnmatch (filename,("*.php")):
match = os.path.join(dirname,filename)
PHP_list.append(match)
#make an empty list;
#tuples will go in the list;
#each tuple will contain a PHP filename and a PHP filename it includes
includeList = []
#iterate through each PHP file and place tuples in the list
for phpFile in PHP_list:
fileOpen = open(phpFile, "r")
#for each line in a PHP file
for line in fileOpen:
m = re.match(r"(.*)include(.*\()(.*)\)", line) #for include(),include_once()
if m:
matchFile = m.group(3)[1:-1]
if matchFile[-4::] == ".php": #only PHP files
phpFile = phpFile.replace("\\","/")
matchFile = matchFile.replace("\\","/")
matchFile = matchFile.replace("\"","")
matchFile = matchFile.replace('\'',"")
includeList.append([phpFile[len(myDir)+1:], matchFile])
else: pass
m = re.match(r'(.*)require(.*\()(.*)\)', line) #for require(), require_once()
if m:
matchFile = m.group(3)[1:-1]
if matchFile[-4::] == '.php': #only PHP files
phpFile = phpFile.replace("\\","/")
matchFile = matchFile.replace("\\","/")
matchFile = matchFile.replace("\"","")
matchFile = matchFile.replace('\'',"")
includeList.append([phpFile[len(myDir)+1:], matchFile])
else: pass
#####
#creating DOT file
dot = open(myFile, "w")
#writing to DOT file
dot.write("digraph {" + br)
for a,b in includeList:
dot.write(tab)
dot.write("\"")
dot.write(a)
dot.write("\"")
dot.write(" -> ")
dot.write("\"")
dot.write(b)
dot.write("\"")
dot.write(";")
dot.write(br)
dot.write("}")
dot.close()
#####
#exiting
sys.exit()
I ran the Python script on the PHP scripts for MXMLiszt.
Then I used the "circo" layout engine in Graphviz – specifically the Gvedit.exe application – on this resultant DOT file.
Here's the result:
--------------
Hi, nice script. I made some improvements, maybe you want to make an update?
* removing duplicates
* working with absolute paths to identify file identities even if references through different relative paths
* improved regular expressions (sometimes the first letter of a file was missing)
* simplified file existence verification (to respect php's simple fallback for includes: if the path doesn't exist, the working directory is tried and the directories in the php.ini's include_path)
#####
#importing modules
import glob, re, sys, os, fnmatch
br = "\n"
tab = "\t"
#regex components
anything = ".*"
quotes = "(\"|')"
dependency = "(require|require_once|include|include_once)"
#####
#exiting if all 3 arguments are not passed via command line
def fail():
print ("ERROR: " + str(len(sys.argv)-1) + " of 3 required arguments provided.")
sys.exit()
#####
#getting arguments passed via command line
#testing for root DIRECTORY string
try: myDir = sys.argv[1]
except: fail()
rootDir = os.path.abspath(myDir)+"/"
#testing for RECURSION boolean
try: myRec = sys.argv[2]
except: fail()
#testing for OUTPUT filename string
try: myFile = sys.argv[3]
except: fail()
print "--------------------------------------------"
print "gathering files"
print "--------------------------------------------"
#####
#making list of PHP files within DIRECTORY
if myRec == "0": #without recursion
myDir2 = myDir + "/*.php"
PHP_list = glob.glob(myDir2)
elif myRec == "1": #with recursion
PHP_list = []
for dirname, dirnames, filenames in os.walk(myDir):
for filename in filenames:
if fnmatch.fnmatch (filename,("*.php")):
match = os.path.join(dirname,filename)
PHP_list.append(match)
for p in PHP_list:
print os.path.abspath(p)
print "--------------------------------------------"
print "searching includes"
print "--------------------------------------------"
#make an empty list;
#tuples will go in the list;
#each tuple will contain a PHP filename and a PHP filename it includes
includeList = set()
#iterate through each PHP file and place tuples in the list
for phpFile in PHP_list:
fileOpen = open(phpFile, "r")
print "processing " + phpFile
#for each line in a PHP file
for line in fileOpen:
m = re.match(anything+dependency+anything+"\((.*)\)", line)
if m:
inBracketExpression = m.group(2)
m = re.match(".*"+quotes+"(.*)"+quotes, inBracketExpression)
if m:
targetBasename = m.group(2)
else:
continue
# print tab + targetBasename
if targetBasename[-4::] == ".php": #only PHP files
source = os.path.abspath(phpFile)
source = source.replace(rootDir,"")
# the include statement checks the path given, otherwise goes for the current working directory and finally checks the include_path in the php.ini
# assume that the given path is valid
targetFile = os.path.abspath(os.path.dirname(phpFile)+"/"+targetBasename)
# if it is not try simulating php's fallback to current working dir methods
if( not os.path.exists(targetFile)):
print tab+" >>> ! >>> "+ targetFile + " doesn't exist, using"
targetFile = os.path.abspath(rootDir+targetBasename)
print tab+targetFile+" instead"
if( not os.path.exists(targetFile) ):
print tab+"no success. skipping include."
continue
targetFile = targetFile.replace(rootDir,"")
includeList.add( (source, targetFile) )
else: pass
print "--------------------------------------------"
print "writing output"
print "--------------------------------------------"
#creating DOT file
dot = open(myFile, "w")
#writing to DOT file
dot.write("digraph dependencies {" + br)
for dependency in includeList:
print dependency
dot.write(tab)
dot.write("\"")
dot.write(dependency[0])
dot.write("\"")
dot.write(" -> ")
dot.write("\"")
dot.write(dependency[1])
dot.write("\"")
dot.write(";")
dot.write(br)
dot.write("}")
dot.close()
#####
#exiting
sys.exit()
carl
24 Sep 12 at 1:33 pm
Hey Carl, thanks for sharing this!
nitin
26 Sep 12 at 7:19 am