1.42k likes | 1.6k Views
Scala Parallel Collections. Aleksandar Prokopec EPFL. Scala collections. for { s <- surnames n <- names if s endsWith n } yield (n, s). Mc Donald. Scala collections. for { s <- surnames n <- names if s endsWith n } yield (n, s). 1040 ms.
E N D
Scala Parallel Collections AleksandarProkopec EPFL
Scala collections for { s <- surnames n <- names if s endsWith n } yield (n, s) McDonald
Scala collections for { s <- surnames n <- names if s endsWith n } yield (n, s) 1040 ms
Scala parallel collections for { s <- surnames n <- names if s endsWith n } yield (n, s)
Scala parallel collections for { s <- surnames.par n <- names.par if s endsWith n } yield (n, s)
Scala parallel collections for { s <- surnames.par n <- names.par if s endsWith n } yield (n, s) 2 cores 575 ms
Scala parallel collections for { s <- surnames.par n <- names.par if s endsWith n } yield (n, s) 4 cores 305 ms
for comprehensions surnames.par.flatMap { s => names.par .filter(n => s endsWith n) .map(n => (n, s)) }
for comprehensionsnested parallelized bulk operations surnames.par.flatMap { s => names.par .filter(n => s endsWith n) .map(n => (n, s)) }
Nested parallelismparallel within parallel surnames.par.flatMap { s => surnameToCollection(s) // may invoke parallel ops } composition
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ...
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: Seq[String]): Seq[String] = if (n == 0) acc
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: Seq[String]): Seq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield recursive algorithms
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: Seq[String]): Seq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: Seq[String]): Seq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: Seq[String]): Seq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c else s gen(5, Array(""))
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: Seq[String]): Seq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c else s gen(5, Array("")) 1545 ms
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: ParSeq[String]): ParSeq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c else s gen(5, ParArray(""))
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: ParSeq[String]): ParSeq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c else s gen(5, ParArray("")) 1 core 1575 ms
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: ParSeq[String]): ParSeq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c else s gen(5, ParArray("")) 2 cores 809 ms
Nested parallelismgoing recursive def vowel(c: Char): Boolean = ... def gen(n: Int, acc: ParSeq[String]): ParSeq[String] = if (n == 0) acc else for (s <- gen(n - 1, acc); c <- 'a' to 'z') yield if (s.length == 0) s + c else if (vowel(s.last) && !vowel(c)) s + c else if (!vowel(s.last) && vowel(c)) s + c else s gen(5, ParArray("")) 4 cores 530 ms
Character countuse case for foldLeft val txt: String = ... txt.foldLeft(0) { case (a, ‘ ‘) => a case (a, c) => a + 1 }
Character countuse case for foldLeft txt.foldLeft(0) { case (a, ‘ ‘) => a case (a, c) => a + 1 } going left to right - not parallelizable! 2 4 3 5 1 0 6 A B C D E F _ + 1
Character countuse case for foldLeft txt.foldLeft(0) { case (a, ‘ ‘) => a case (a, c) => a + 1 } going left to right – not really necessary 6 _ + _ 2 1 0 3 A B C 0 2 3 1 D E F _ + 1 _ + 1
Character countin parallel txt.fold(0) { case (a, ‘ ‘) => a case (a, c) => a + 1 }
Character countin parallel txt.fold(0) { case (a, ‘ ‘) => a case (a, c) => a + 1 } : (Int, Char) => Int 2 1 1 3 A B C 3 2 1 1 A B C _ + 1
Character countfold not applicable txt.fold(0) { case (a, ‘ ‘) => a case (a, c) => a + 1 } ! (Int, Int) => Int 3 3 _ + _ 3 2 1 3 A B C 3 2 1 3 A B C
Character countuse case for aggregate txt.aggregate(0)({ case (a, ‘ ‘) => a case (a, c) => a + 1 }, _ + _)
Character countuse case for aggregate txt.aggregate(0)({ case (a, ‘ ‘) => a case (a, c) => a + 1 }, _ + _) 3 3 _ + _ 3 1 1 2 A B C 3 2 1 3 A B C _ + 1
Character countuse case for aggregate txt.aggregate(0)({ case (a, ‘ ‘) => a case (a, c) => a + 1 }, _ + _) aggregation element 3 3 _ + _ 1 2 3 1 A B B C 3 2 3 1 A B C _ + 1
Character countuse case for aggregate txt.aggregate(0)({ case (a, ‘ ‘) => a case (a, c) => a + 1 }, _ + _) aggregation aggregation aggregation element 3 3 _ + _ 1 2 3 1 A B B C 3 2 3 1 A B C _ + 1
Word countanother use case for foldLeft txt.foldLeft((0, true)) { case ((wc, _), ' ') => (wc, true) case ((wc, true), x) => (wc + 1, false) case ((wc, false), x) => (wc, false) }
Word countinitial accumulation txt.foldLeft((0, true)) { case ((wc, _), ' ') => (wc, true) case ((wc, true), x) => (wc + 1, false) case ((wc, false), x) => (wc, false) } 0 words so far last character was a space “Folding me softly.”
Word counta space txt.foldLeft((0, true)) { case ((wc, _), ' ') => (wc, true) case ((wc, true), x) => (wc + 1, false) case ((wc, false), x) => (wc, false) } last seen character is a space “Folding me softly.”
Word counta non space txt.foldLeft((0, true)) { case ((wc, _), ' ') => (wc, true) case ((wc, true), x) => (wc + 1, false) case ((wc, false), x) => (wc, false) } last seen character was a space – a new word “Folding me softly.”
Word counta non space txt.foldLeft((0, true)) { case ((wc, _), ' ') => (wc, true) case ((wc, true), x) => (wc + 1, false) case ((wc, false), x) => (wc, false) } last seen character wasn’t a space – no new word “Folding me softly.”
Word countin parallel P1 P2 “Folding me “ “softly.“
Word countin parallel P1 P2 “Folding me “ “softly.“ wc = 2; rs = 1 wc = 1; ls = 0
Word countin parallel P1 P2 “Folding me “ “softly.“ wc = 2; rs = 1 wc = 1; ls = 0 wc = 3
Word countmust assume arbitrary partitions P1 P2 “Foldin“ “g me softly.“ wc = 1; rs = 0 wc = 3; ls = 0
Word count must assume arbitrary partitions P1 P2 “Foldin“ “g me softly.“ wc = 1; rs = 0 wc = 3; ls = 0 wc = 3
Word countinitial aggregation txt.par.aggregate((0, 0, 0))
Word countinitial aggregation txt.par.aggregate((0, 0, 0)) # spaces on the left #words # spaces on the right
Word countinitial aggregation txt.par.aggregate((0, 0, 0)) # spaces on the left #words # spaces on the right ””
Word countaggregation aggregation ... }, { case ((0, 0, 0), res) => res case (res, (0, 0, 0)) => res ““ “softly.“ “Folding me“ ““