Browse Source

much faster packages from ghc and monad-par

Make getting the pacakge set from GHC much faster by rather parsing the
package dump and also switch to monad-par for parallelism. Formatting
imports in a large repository whent from 30s to 3s.
master
Handré Stolp 2 years ago
parent
commit
4c48e06047
5 changed files with 230 additions and 206 deletions
  1. +21
    -7
      ch-hs-imports.cabal
  2. +12
    -10
      default.nix
  3. +8
    -1
      hie.yaml
  4. +2
    -0
      shell.nix
  5. +187
    -188
      src/Main.hs

+ 21
- 7
ch-hs-imports.cabal View File

@@ -11,22 +11,36 @@ extra-source-files: CHANGELOG.md
executable ch-hs-imports
main-is: Main.hs
build-depends :
async ^>= 2.2.0,
abstract-par ^>= 0.3.3,
base ^>= 4.12.0.0,
containers ^>= 0.6.0.1,
deepseq ^>= 1.4.4.0,
directory ^>= 1.3.3.0,
dlist ^>= 0.8,
filepath ^>= 1.4.2.1,
megaparsec ^>= 7.0.5,
monad-par ^>= 0.3.4.8,
monad-par-extras ^>= 0.3.3,
monoidal-containers ^>= 0.4.0.0,
mtl ^>= 2.2.2,
nonempty-containers ^>= 0.3.1.0,
optparse-applicative >= 0.14 && < 0.16,
streaming ^>= 0.2.2.0,
streaming-bytestring ^>= 0.1.6,
streaming-process ^>= 0.1.0.0,
process ^>= 1.6.5.0,
text ^>= 1.2.3.1,
transformers ^>= 0.5.6.2,
unliftio-core ^>= 0.1.2.0
transformers ^>= 0.5.6.2
hs-source-dirs: src
default-language: Haskell2010
ghc-options: -Wall -fwarn-incomplete-uni-patterns -threaded -O
ghc-options:
-O2
-threaded
-rtsopts
-with-rtsopts=-N
-Werror
-Wall
-Wcompat
-Widentities
-Wincomplete-record-updates
-Wincomplete-uni-patterns
-Wpartial-fields
-Wredundant-constraints

+ 12
- 10
default.nix View File

@@ -1,21 +1,22 @@
{ mkDerivation
, async
, abstract-par
, base
, containers
, deepseq
, directory
, dlist
, filepath
, megaparsec
, monad-par
, monad-par-extras
, monoidal-containers
, mtl
, nonempty-containers
, optparse-applicative
, process
, stdenv
, streaming
, streaming-bytestring
, streaming-process
, text
, transformers
, unliftio-core
}:
mkDerivation {
pname = "ch-hs-imports";
@@ -24,22 +25,23 @@ mkDerivation {
isLibrary = false;
isExecutable = true;
executableHaskellDepends = [
async
abstract-par
base
containers
deepseq
directory
dlist
filepath
megaparsec
monad-par
monad-par-extras
monoidal-containers
mtl
nonempty-containers
optparse-applicative
streaming
streaming-bytestring
streaming-process
process
text
transformers
unliftio-core
];
license = stdenv.lib.licenses.mit;
}

+ 8
- 1
hie.yaml View File

@@ -2,6 +2,13 @@ cradle:
direct:
arguments:
- -Wall
- -fwarn-incomplete-uni-patterns
- -Wcompat
- -Widentities
- -Wincomplete-record-updates
- -Wincomplete-uni-patterns
- -Wpartial-fields
- -Wredundant-constraints
- -isrc
- -package-env - # so that .ghci-environment.* generated by cabal is not used



+ 2
- 0
shell.nix View File

@@ -20,6 +20,8 @@ let
directory
async
nonempty-containers
monad-par
monad-par-extras
]
);
# the dependencies available in the shell


+ 187
- 188
src/Main.hs View File

@@ -7,16 +7,17 @@
{-# language ApplicativeDo #-}
{-# language DuplicateRecordFields #-}
{-# language TypeFamilies #-}
{-# language DeriveGeneric #-}
{-# language DeriveAnyClass #-}

import Control.Concurrent.Async (mapConcurrently, forConcurrently_)
-- abstract-par
import Control.Monad.Par.Class (ParFuture)

-- base
import Control.Applicative hiding (some, many)
import qualified Control.Applicative as Applicative
import Control.Arrow ((>>>))
import Control.Monad
import Control.Monad.IO.Class
import Data.Bifunctor (first, second, bimap)
import qualified Data.Char as Char
import Data.Foldable
@@ -28,11 +29,17 @@ import qualified Data.List.NonEmpty as NonEmpty
import Data.Maybe
import Data.Tuple (swap)
import Data.Void
import GHC.Generics (Generic)
import Prelude hiding (takeWhile)
import System.IO (stderr, hPutStrLn)

-- containers
import Data.Set (Set)
import qualified Data.Set as Set

-- deepseq
import Control.DeepSeq

-- directory
import System.Directory

@@ -46,10 +53,19 @@ import System.FilePath.Posix
import Text.Megaparsec
import Text.Megaparsec.Char

-- monad-par
import Control.Monad.Par.IO (runParIO)

-- monad-par-extras
import Control.Monad.Par.Combinator (parMapM)

-- monoidal-containers
import Data.Map.Monoidal (MonoidalMap)
import qualified Data.Map.Monoidal as MonoidalMap

-- mtl
import Control.Monad.Trans

-- nonempty-containers
import qualified Data.Set.NonEmpty as NESet
import Data.Set.NonEmpty (NESet)
@@ -57,21 +73,12 @@ import Data.Set.NonEmpty (NESet)
-- optparse-applicative
import qualified Options.Applicative as Options

import qualified Streaming as S
import qualified Streaming.Prelude as S

import qualified Data.ByteString.Streaming as SB
import qualified Data.ByteString.Streaming.Char8 as SB

import Streaming.Process (withStreamingCommand)
-- process
import System.Process (runInteractiveCommand)

-- text
import qualified Data.Text as Text
import Data.Text (Text)
import qualified Data.Text.Encoding as Text
import qualified Data.Text.IO as Text
import qualified Data.Text.Lazy as Text (toStrict)
import qualified Data.Text.Lazy.Builder as TextBuilder
@@ -79,13 +86,10 @@ import qualified Data.Text.Lazy.Builder as TextBuilder
-- transformers
import Control.Monad.Trans.Maybe

import Control.Monad.IO.Unlift

data Options = Options
{ overwrite :: Bool
, localModulesFromCurrentDir :: Bool
, modulesFromAllGhcPkgs :: Bool
, reportProgress :: Bool
, filePaths :: [String]
}
@@ -106,14 +110,6 @@ options = do
& mconcat
& Options.switch

modulesFromAllGhcPkgs <-
[ Options.long "modules-from-all-ghc-pkgs"
, Options.help "Get the module set once at the start from all the packages registered with ghc. A lot faster."
]
& mconcat
& Options.switch


reportProgress <-
[ Options.long "report-progress"
, Options.help "When doing multiple files report progress along the way"
@@ -143,18 +139,18 @@ main = do
& Options.info (options <**> Options.helper)
)

groupFileImports porgramOptions
runParIO (groupFileImports porgramOptions)


groupFileImports
:: ( MonadUnliftIO m
:: ( MonadIO m
, ParFuture iv m
)
=> Options -> m ()
groupFileImports
Options
{ overwrite
, localModulesFromCurrentDir
, modulesFromAllGhcPkgs
, reportProgress
, filePaths
} = do
@@ -163,14 +159,11 @@ groupFileImports
reportProgress
(liftIO $ putStrLn $ "processing " ++ show (length filePaths) ++ " number of files.")

(globalModulesFromAllGhcPackages, determineModulePackage_) <-
if modulesFromAllGhcPkgs
then
(, \ a b c d -> pure $ determineModulePackage' a b c d) <$> getModuleMapFromGhc
<* when
reportProgress
(liftIO $ putStrLn "got all exposed modules from GHC package set")
else pure (MonoidalMap.empty, determineModulePackage)
globalModulesFromAllGhcPackages <-
liftIO getPackageModulesFromGhcDump
<* when
reportProgress
(liftIO $ putStrLn "got all exposed modules from GHC package set")

localModulesCWD <-
if localModulesFromCurrentDir
@@ -182,47 +175,56 @@ groupFileImports
else
pure (Set.empty, MonoidalMap.empty)

runInIO <- askRunInIO
liftIO $ forConcurrently_ filePaths $ \filePath -> runInIO $ do
absoluteFilePath_ <- absoluteFilePath filePath
fileText <- liftIO $ Text.readFile filePath
(localPackages_, moduleMap) <-
( if localModulesFromCurrentDir
then
pure localModulesCWD

else
( (<>)
<$> localModules filePath
<*> projectModules filePath
)
<* when
reportProgress
(liftIO $ putStrLn $ "got local modules relative to file path: " ++ filePath)

) <&> second (<> globalModulesFromAllGhcPackages)

let outPutResult =
if overwrite
then \newFileText ->
when
(newFileText /= fileText)
(Text.writeFile filePath newFileText)
else Text.putStr

case second (extractImports . contentSplitTrailingBlankLines) $ parse parseConent filePath fileText of
Left e ->
liftIO $ hPutStrLn stderr (unlines ["Failed parsing:", errorBundlePretty e, "for file: " ++ filePath])

Right ExtractImports{beforeImports, imports, afterImports} ->
do groupedImports <-
liftIO $ groupImportsByPackage <$> traverse (determineModulePackage_ absoluteFilePath_ localPackages_ moduleMap) imports

[beforeImports, groupedImports, afterImports] & mconcat & contentToText & outPutResult & liftIO

when
reportProgress
(liftIO $ putStrLn $ "finished: " ++ filePath)
void
$ filePaths
& parMapM
(\filePath -> do
absoluteFilePath_ <- absoluteFilePath filePath
fileText <- liftIO $ Text.readFile filePath
(localPackages_, moduleMap) <-
( if localModulesFromCurrentDir
then
pure localModulesCWD

else
( (<>)
<$> localModules filePath
<*> projectModules filePath
)
<* when
reportProgress
(liftIO $ putStrLn $ "got local modules relative to file path: " ++ filePath)

) <&> second (<> globalModulesFromAllGhcPackages)

let outPutResult =
if overwrite
then \newFileText ->
when
(newFileText /= fileText)
(Text.writeFile filePath newFileText)
else Text.putStr

case second (extractImports . contentSplitTrailingBlankLines) $ parse parseConent filePath fileText of
Left e ->
liftIO $ hPutStrLn stderr (unlines ["Failed parsing:", errorBundlePretty e, "for file: " ++ filePath])

Right ExtractImports{beforeImports, imports, afterImports} ->
let groupedImports =
imports
& map (determineModulePackage absoluteFilePath_ localPackages_ moduleMap)
& groupImportsByPackage

in [beforeImports, groupedImports, afterImports]
& mconcat
& contentToText
& outPutResult
& liftIO

when
reportProgress
(liftIO $ putStrLn $ "finished: " ++ filePath)
)


contentSplitTrailingBlankLines :: [Content] -> [Content]
@@ -257,10 +259,10 @@ splitTrailingBlankLines =
>>> swap

data ExtractImports = ExtractImports
{ beforeImports :: [Content]
, imports :: [Import]
, afterImports :: [Content]
} deriving (Show, Eq, Ord)
{ beforeImports :: ![Content]
, imports :: ![Import]
, afterImports :: ![Content]
} deriving (Show, Eq, Ord, Generic, NFData)

extractImports :: [Content] -> ExtractImports
extractImports xs0 =
@@ -313,24 +315,24 @@ contentToText =
AOtherLine (OtherLine t) -> t

data Import = Import
{ content :: Text
, packageName :: Maybe Text
, moduleName :: ModuleName
{ content :: !Text
, packageName :: !(Maybe Text)
, moduleName :: !ModuleName
}
deriving (Show, Eq, Ord)
deriving (Show, Eq, Ord, Generic, NFData)

newtype SingleLineComment = SingleLineComment Text deriving (Show, Eq, Ord)
newtype SingleLineComment = SingleLineComment Text deriving (Show, Eq, Ord, Generic, NFData)

newtype BlankLine = BlankLine Text deriving (Show, Eq, Ord)
newtype BlankLine = BlankLine Text deriving (Show, Eq, Ord, Generic, NFData)

newtype OtherLine = OtherLine Text deriving (Show, Eq, Ord)
newtype OtherLine = OtherLine Text deriving (Show, Eq, Ord, Generic, NFData)

data Content
= ASingleLineComment SingleLineComment
| ABlankLine BlankLine
| AImport Import
| AOtherLine OtherLine
deriving (Show, Eq, Ord)
= ASingleLineComment !SingleLineComment
| ABlankLine !BlankLine
| AImport !Import
| AOtherLine !OtherLine
deriving (Show, Eq, Ord, Generic, NFData)

type Parser = Parsec Void Text

@@ -469,41 +471,30 @@ bol = (<?> "bol") $ do
guard (pos == pos1)

data LocalPackage = LocalPackage
{ packageName :: PackageName
, pathToCabalFile :: AbsoluteFilePath
, cabalPackageDependencies :: [PackageName]
} deriving (Show, Eq, Ord)
{ packageName :: !PackageName
, pathToCabalFile :: !AbsoluteFilePath
, cabalPackageDependencies :: ![PackageName]
} deriving (Show, Eq, Ord, Generic, NFData)

data PackageSource
= ALocalPackage LocalPackage
| AGlobalPackage PackageName
deriving (Show, Eq, Ord)
= ALocalPackage !LocalPackage
| AGlobalPackage !PackageName
deriving (Show, Eq, Ord, Generic, NFData)

newtype PackageName = PackageName {getPackageName :: Text} deriving (Eq, Ord, Show)
newtype PackageName = PackageName {getPackageName :: Text} deriving (Eq, Ord, Show, Generic, NFData)

packageNameFromSource :: PackageSource -> PackageName
packageNameFromSource = \case
AGlobalPackage packageName -> packageName
ALocalPackage LocalPackage{packageName} -> packageName

dropPackageVersion :: Text -> Text
dropPackageVersion t =
let idx =
t
& Text.reverse
& Text.findIndex ('-' ==)
& fromMaybe 1
& (\i -> Text.length t - i - 1)

in t & Text.splitAt idx & fst

determineModulePackage'
determineModulePackage
:: AbsoluteFilePath
-> Set LocalPackage
-> MonoidalMap ModuleName (NESet PackageSource)
-> Import
-> (Import, PackageName)
determineModulePackage' absoluteFilePath_ localPackages_ localModules_ i@Import{moduleName}
determineModulePackage absoluteFilePath_ localPackages_ localModules_ i@Import{moduleName}
| Import{packageName = Just packageName } <- i
= (i, PackageName packageName)

@@ -513,34 +504,6 @@ determineModulePackage' absoluteFilePath_ localPackages_ localModules_ i@Import{

| otherwise = (i, PackageName "")

determineModulePackage
:: AbsoluteFilePath
-> Set LocalPackage
-> MonoidalMap ModuleName (NESet PackageSource)
-> Import
-> IO (Import, PackageName)
determineModulePackage absoluteFilePath_ localPackages_ localModules_ import_@Import{moduleName} = do
fromFindModule <-
withStreamingCommand
("ghc-pkg find-module " <> Text.unpack (unModuleName moduleName) <> " --simple-output")
SB.empty
( SB.words
>>> S.mapped (SB.foldlChunks (<>) "")
>>> S.map
((moduleName,)
. NESet.singleton
. AGlobalPackage
. PackageName
. dropPackageVersion
. Text.decodeUtf8
)
>>> S.hoist SB.effects
>>> S.toList
)
<&> MonoidalMap.fromListWith (<>) . S.fst'

pure $ determineModulePackage' absoluteFilePath_ localPackages_ (localModules_ <> fromFindModule) import_

pickPackage :: AbsoluteFilePath -> Set LocalPackage -> NESet PackageSource -> PackageName
pickPackage (AbsoluteFilePath filePath) localPackages_ = NESet.toList >>> \case
(matchOnePackage :| []) ->
@@ -639,10 +602,11 @@ findEnclosingFile fileMatcher = absoluteFilePath >=> runMaybeT . go
byExtension :: String -> FilePath -> Bool
byExtension extension = and . sequence [isExtensionOf extension, not . List.null . takeBaseName]

newtype ModuleName = ModuleName {unModuleName :: Text} deriving (Show, Eq, Ord)
newtype ModuleName = ModuleName {unModuleName :: Text} deriving (Show, Eq, Ord, Generic, NFData)

projectModules
:: ( MonadUnliftIO m
:: ( MonadIO m
, ParFuture iv m
)
=> FilePath -> m (Set LocalPackage, MonoidalMap ModuleName (NESet PackageSource))
projectModules filePath = do
@@ -661,7 +625,8 @@ projectModules filePath = do


localModules
:: ( MonadUnliftIO m
:: ( MonadIO m
, ParFuture iv m
)
=> FilePath -> m (Set LocalPackage, MonoidalMap ModuleName (NESet PackageSource))
localModules filePath = do
@@ -690,7 +655,7 @@ localModules filePath = do
& liftIO

gatherFiles ".hs" (takeDirectory cabalPath)
>>= liftIO . mapConcurrently (fmap (either (const Nothing) Just . parse parseModuleAndModuleName "") . Text.readFile . unAbsoluteFilePath)
>>= parMapM (fmap (either (const Nothing) Just . parse parseModuleAndModuleName "") . (liftIO . Text.readFile) . unAbsoluteFilePath)
<&> mapMaybe
(fmap $ \moduleName ->
let localPackage =
@@ -719,14 +684,14 @@ localModules filePath = do
pure (Set.empty, MonoidalMap.empty)

gatherFiles
:: ( MonadUnliftIO m
:: ( MonadIO m
, ParFuture iv m
)
=> String -> FilePath -> m [AbsoluteFilePath]
gatherFiles extension =
absoluteFilePath >=> fmap DList.toList . go
where
go (AbsoluteFilePath filePath) = do
runInIO <- askRunInIO
go (AbsoluteFilePath filePath) =
fmap (fromMaybe DList.empty)
. runMaybeT
$ do
@@ -741,7 +706,7 @@ gatherFiles extension =
liftIO (doesDirectoryExist filePath) >>= guard
liftIO (listDirectory filePath)
<&> map (AbsoluteFilePath . (filePath </>))
>>= liftIO . mapConcurrently (runInIO . go)
>>= lift . parMapM go
<&> mconcat

checkFile = do
@@ -760,53 +725,11 @@ parseModuleAndModuleName = moduleName <|> (parseRestOfLine >> parseModuleAndModu
>> some spaceChar
>> parseModuleName


getModuleMapFromGhc
:: ( MonadUnliftIO m
)
=> m (MonoidalMap ModuleName (NESet PackageSource))
getModuleMapFromGhc =
liftIO getPackagesFromGhc
>>= liftIO
. mapConcurrently
( \packageName ->
map (, NESet.singleton (AGlobalPackage packageName))
<$> getPackageModulesFromGhc packageName
)
<&> MonoidalMap.fromListWith (<>) . concat


getPackagesFromGhc :: IO [PackageName]
getPackagesFromGhc =
S.fst'
<$> withStreamingCommand
"ghc-pkg list --simple-output"
SB.empty
( SB.words
>>> S.mapped (SB.foldlChunks (<>) "")
>>> S.map (PackageName . dropPackageVersion . Text.decodeUtf8)
>>> S.hoist SB.effects
>>> S.toList
)

getPackageModulesFromGhc :: PackageName -> IO [ModuleName]
getPackageModulesFromGhc (PackageName packageName)=
S.fst'
<$> withStreamingCommand
("ghc-pkg field " <> Text.unpack packageName <> " exposed-modules --simple-output")
SB.empty
( SB.words
>>> S.mapped (SB.foldlChunks (<>) "")
>>> S.map (ModuleName . Text.decodeUtf8)
>>> S.hoist SB.effects
>>> S.toList
)

parseRestOfLine :: Parser Text
parseRestOfLine =
(<?> "restOfLine") $ Text.pack <$> manyTill (printChar <|> char '\t') (void eol)

newtype AbsoluteFilePath = AbsoluteFilePath {unAbsoluteFilePath :: FilePath} deriving (Show, Eq, Ord)
newtype AbsoluteFilePath = AbsoluteFilePath {unAbsoluteFilePath :: FilePath} deriving (Show, Eq, Ord, Generic, NFData)

absoluteFilePath :: (MonadIO m) => FilePath -> m AbsoluteFilePath
absoluteFilePath = fmap AbsoluteFilePath . liftIO . makeAbsolute
@@ -857,3 +780,74 @@ parseCabalDependencies = do
<|> (char ',' >> space)
)
) <?> "package name"

getPackageModulesFromGhcDump :: IO (MonoidalMap ModuleName (NESet PackageSource))
getPackageModulesFromGhcDump = do
(_, hOut, _, _) <-
runInteractiveCommand "ghc-pkg dump"
Text.hGetContents hOut
<&> parse parsePackageDump ""
>>= \case
Left e ->
putStrLn
("error parsing ghc package dump: " ++ errorBundlePretty e)
>> pure MonoidalMap.empty

Right x ->
pure x

parsePackageDump :: Parser (MonoidalMap ModuleName (NESet PackageSource))
parsePackageDump =
some parsePackageDumpPackage <&> mconcat


parsePackageDumpPackage :: Parser (MonoidalMap ModuleName (NESet PackageSource))
parsePackageDumpPackage = do
package_ <-
NESet.singleton
. AGlobalPackage
. PackageName
. Text.pack
<$> parseName

(moduleNames, skipTillEndOfPackage) <-
skipSomeTill
(try skipLine)
( asum
[ (,True) <$> try parseExposedModules
, ([], False) <$ try packageEnd
]
)

when
skipTillEndOfPackage
(void $ skipManyTill (try skipLine) packageEnd)

pure
( moduleNames
& map (, package_)
& MonoidalMap.fromListWith (<>)
)

where
skipLine = manyTill (printChar <|> char '\t') eol

parseName =
string "name:" >> skipSome nonEolSpaceChar >> someTill printChar eol

parseExposedModules =
string "exposed-modules:"
>> eol
>> some
( try
$ skipSome nonEolSpaceChar
>> sepBy1 parseModuleName nonEolSpaceChar
<* eol
)
<&> concat

packageEnd = try (string "---" >> (void eol <|> eof)) <|> eof

nonEolSpaceChar :: Parser ()
nonEolSpaceChar =
void $ notFollowedBy eol >> (spaceChar <|> char '\t')

Loading…
Cancel
Save