-
Notifications
You must be signed in to change notification settings - Fork 0
/
queries4movies.py
151 lines (114 loc) · 5.33 KB
/
queries4movies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
from neo4j import GraphDatabase, basic_auth
# connection with authentication
driver = GraphDatabase.driver("bolt://localhost", auth=basic_auth("neo4j", "password"), encrypted=False)
# connection without authentication
# driver = GraphDatabase.driver("bolt://localhost", encrypted=False)
session = driver.session()
transaction = session.begin_transaction()
write_output = open("output.txt", "w")
# 1.) List the first 20 actors in descending order of the number of films they acted in.
# OUTPUT: actor_name, number_of_films_acted_in
write_output.write("### Q1 ###\n")
result = transaction.run("""
MATCH (a:Actor)-[:ACTS_IN]->(m:Movie)
WITH a, collect(m) AS movies
RETURN a.name, length(movies)
ORDER BY length(movies) DESC
LIMIT 20;
""")
for record in result:
write_output.write(record['a.name'] + ',' + str(record['length(movies)']) + '\n')
write_output.write("\n")
# 2.) List the titles of all movies with a review with at most 3 stars.
# OUTPUT: movie title
write_output.write("### Q2 ###\n")
result = transaction.run("""
MATCH (u) -[r:RATED]-> (m:Movie)
WHERE r.stars <= 3
RETURN m.title as mt
""")
for record in result:
write_output.write(record['mt'] + '\n')
write_output.write("\n")
# 3.) Find the movie with the largest cast, out of the list of movies that have a review.
# OUTPUT: movie_title, number_of_cast_members
write_output.write("### Q3 ###\n")
result = transaction.run("""
MATCH (a:Actor)-[:ACTS_IN]->(m:Movie)
MATCH (u)-[r:RATED]->(m:Movie)
WITH collect(DISTINCT a) as actors, m
RETURN m.title, length(actors)
ORDER BY length(actors) DESC
LIMIT 1;
""")
for record in result:
write_output.write(record['m.title'] + ', ' + str(record['length(actors)']) + '\n')
write_output.write("\n")
# 4.) Find all the actors who have worked with at least 3 different directors (regardless of how many movies
# they acted in). For example, 3 movies with one director each would satisfy this (provided the directors
# where different), but also a single movie with 3 directors would satisfy it as well.
# OUTPUT: actor_name, number_of_directors_he/she_has_worked_with
write_output.write("### Q4 ###\n")
result = transaction.run("""
MATCH (actor:Actor)-[:ACTS_IN]->(movie:Movie)<-[:DIRECTED]-(director:Director)
WITH actor, collect(DISTINCT director) AS num_directors WHERE length(num_directors) >= 3
RETURN actor.name AS actor_name, length(num_directors) AS `num_directors`
""")
for record in result:
write_output.write(record['actor_name'] + ', ' + str(record['num_directors']) + '\n')
write_output.write("\n")
# 5.) The Bacon number of an actor is the length of the shortest path between the actor and Kevin Bacon
# in the "co-acting" graph. That is, Kevin Bacon has Bacon number 0; all actors who acted in the same
# movie as him have Bacon number 1; all actors who acted in the same film as some actor with Bacon
# number 1 have Bacon number 2, etc. List all actors whose Bacon number is exactly 2 (first name, last name).
# You can familiarize yourself with the concept, by visiting The Oracle of Bacon.
# OUTPUT: actor_name
write_output.write("### Q5 ###\n")
result = transaction.run("""
MATCH (bacon2:Actor)-[:ACTS_IN]->(movie2:Movie)<-[:ACTS_IN]-(bacon1:Actor)-[:ACTS_IN]->(movie:Movie)<-[:ACTS_IN]-(bacon:Actor {name: 'Kevin Bacon'})
WITH collect(distinct bacon1.name) as b1n, collect(distinct bacon2.name) as b2n
RETURN FILTER( n IN b2n WHERE NOT n IN b1n ) as bacon2_exclusive
""")
for record in result:
for i in record['bacon2_exclusive']:
write_output.write(i + '\n')
write_output.write("\n")
# 6.) List which genres have movies where Tom Hanks starred in.
# OUTPUT: genre
write_output.write("### Q6 ###\n")
result = transaction.run("""
MATCH (a:Actor)-[:ACTS_IN]->(m:Movie)
WHERE a.name = "Tom Hanks"
RETURN DISTINCT m.genre;
""")
for record in result:
write_output.write(record['m.genre'] + '\n')
write_output.write("\n")
# 7.) Show which directors have directed movies in at least 2 different genres.
# OUTPUT: director name, number of genres
write_output.write("### Q7 ###\n")
result = transaction.run("""
MATCH (d:Director)-[:DIRECTED]->(m:Movie)
WITH collect(DISTINCT m.genre) as genres, d
WHERE length(genres) > 1
RETURN d.name, length(genres)
ORDER BY length(genres) DESC
""")
for record in result:
write_output.write(record['d.name'] + ', ' + str(record['length(genres)']) + '\n')
write_output.write("\n")
# 8.) Show the top 5 pairs of actor, director combinations, in descending order of frequency of occurrence.
# OUTPUT: director's name, actors' name, number of times director directed said actor in a movie
write_output.write("### Q8 ###\n")
result = transaction.run("""
MATCH (a:Actor)-[:ACTS_IN]->(m:Movie)<-[:DIRECTED]-(d:Director)
WITH d, a, collect((a:Actor)-[:ACTS_IN]->()<-[:DIRECTED]-(d:Director)) as unique_pair
RETURN d.name, a.name, size(unique_pair) as director_actor_pair
ORDER BY director_actor_pair DESC
LIMIT 5
""")
for record in result:
write_output.write(record['d.name'] + ', ' + record['a.name'] + ', ' + str(record['director_actor_pair']) + '\n')
transaction.close()
write_output.close()
session.close()