#Import SparkContext from pyspark from pyspark import SparkContext sc = SparkContext() from operator import add rdd1 =…

#Import SparkContext from pyspark from pyspark import SparkContext sc = SparkContext() from operator import add rdd1 =…

#Import SparkContext from pyspark
from pyspark import SparkContext
sc = SparkContext()
from operator import add
rdd1 = sc.parallelize([(“a”,1),(“b”,1),(“a”,1)])
sorted(rdd1.reduceByKey(add).collect())
!curl -L https://github.com/fivethirtyeight/data/blob/master/daily-show-guests/daily_show_guests.csv -o daily.csv
!head -10 daily.csv
raw = sc.textFile(“daily.csv”)
raw.take(5)
daily = raw.map(lambda line: line.split(‘,’))
daily.take(5)
# Aggregate total count of visitors per year
tally = daily.map(lambda x: (x[0], 1))
       .reduceByKey(lambda x,y: x+y)
print(tally)
# because Spark is lazy we need to perform an action on the RDD
tally.take(tally.count())
Questions: How do I sort the tally by year
 
“Looking for a Similar Assignment? Get Expert Help at an Amazing Discount!”

Rate this post
"Is this question part of your assignment? We will write the assignment for you. click order now and get up to 40% Discount"