buildProtein.lua

#!/usr/bin/env luajit

--[[
   buildProtein.lua

Copyright 2016, 2017 Robert T. Miller

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use these files except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
]]

--- buildProtein.lua [ options ] <PDB file | PIC file> ...
--
-- Default action is to read a PDB file and print corresponding 'Protein
-- Internal Coordinates' (PIC) data, or a PIC file and print corresponding
-- PDB format output.
--
-- options:
--
-- -f=<list of PDB codes> : read PDB codes and optional chain IDs
--                            from file and use to form filename to read
--                            from PDB_repository (location specified below),
--                            e.g. 7RSA becomes
--                            /media/data/pdb/rs/pdb7rsa.ent.gz
-- -w : instead of printing, write output to input filename with '.pic'.
--                            or '.gpdb' (generated PDB) extension
-- -t : test mode - read input file, generate PDB data or internal
--                            coordinates, attempt to re-generate input file,
--                            compare to input and report initial differences
--                            if any
--
-- Using the -t and -f option with a cullpdb_pc20_res2.2_R1.0.curr from May, 2016
-- (http://dunbrack.fccc.edu/PISCES.php -- Dunbrack Lab PISCES server),
-- 97.5% of 5,825 protein chains are regenerated correctly (backbone and sidechain
-- atoms) according to the line-by-line comparison test.



local parsers = require 'rfold.parsers'
local protein = require "rfold.protein"
local utils = require 'rfold.utils'

local PDB_repository_base = '/media/data/pdb/'

local args = parsers.parseCmdLine(
   '[ <pdbid[chain id]> | <pdb file> | <pic file> ] ...',
   '\n convert PDB files to internal coordinates and vice-versa. \n PDBid[chain id] file (/xx/<pdbid>.ent.gz) located under ' .. (PDB_repository_base and PDB_repository_base or '[not configured]') .. " \n 'pic file' in 'protein internal coordinates' format as generated by this program.\n",
   {
--      ['a'] = 'average: generate hedron_default.lua and dihedron_default.lua files of average values from input files',
      ['t'] = 'test: convert (PDB|internal_coordinates) input to (internal_coordinates|PDB) and back again, verify match',
      ['w'] = 'write: converted result to <pdbid>.pdb or <pdbid>.pic in current directory'
   },
   {
      ['f'] = '<input file> : process files listed in <input file> (1 per line) followed by any on command line'
   }
)

local toProcess={}
if args['f'] then
   for i,f in ipairs(args['f']) do
      local fh=io.open(f)
      for line in fh:lines() do
         table.insert(toProcess, line)
      end
   end
end

for i,a in ipairs(args) do table.insert(toProcess, a) end

for i,a in ipairs(toProcess) do
   if a:match('^IDs%s') then toProcess[i]='' end     -- header line for Dunbrack list : 'IDs         length Exptl.  resolution  R-factor FreeRvalue'
   if a:match('^#') then toProcess[i]='' end         -- comment line starts with '#'
   if a:ematch('^(%d%w%w%w)(%w?)%s+') then           -- looks like pdbcode with optional chain, read as compressed file from PDB_repository_base
      local pdbid, chn = _1:lower(), _2
      local subdir = pdbid:match('^%w(%w%w)%w$')
      toProcess[i] = PDB_repository_base .. '/' .. subdir .. '/pdb' .. pdbid:lower() .. '.ent.gz'
      if (chn) then
         toProcess[i] = toProcess[i] .. ' ' .. chn
      end
   end
end

for i,arg in ipairs(toProcess) do
   if '' ~= arg then
      --print(arg)
      if 'quit' == arg then os.exit() end    -- so can insert 'quit' in input file of PDB IDs for testing and debugging
      local file,chain = arg:match('^(%S+)%s?(%w?)%s*$')
      if chain == '' then chain = nil end
      --print(file,chain,arg)
      local pdbid = parsers.parseProteinData(file, function (t) protein.load(t) end, chain)   -- read protein data file either PDB or pic format into object in in-memory table 'proteins' (purged at end of loop)
      local prot = protein.get(pdbid)  -- select protein object from proteins table

      prot:setStartCoords()            -- copy first residue N, CA, C coordinates from input data to each chain residue 1 initCoords list

      if prot:countDihedra() > 0 then  -- did read internal coordinates (pic format data), so generate PDB

         --prot:setStartCoords()               -- copy residue 1 N, CA, C coordinates from input data to each chain residue 1 initCoords list
         if args['a'] then
            -- not yet implemented
            --hedron_default = prot:addHedronData(hedron_default)
            --dihedron_default = prot:addDihedronData(dihedron_default)
         elseif args['t'] then
            io.write('testing ' .. arg .. ' ... ')
            io.flush()
            local s0 = prot:writeInternalCoords()  -- get output for internal coordinate data as loaded
            prot:internalToAtomCoords()    -- generate PDB atom coordinates from internal coordinates
            prot:clearInternalCoords()          -- wipe internal coordinates as loaded
            prot:atomsToInternalCoords()    -- generate internal coordinates from PDB atom coordinates
            local s1 = prot:writeInternalCoords()  -- get output for internal coordinate data as generated
            local c = utils.lineByLineCompare(s0,s1)
            if c then
               print('passed. ' .. c .. ' pic lines compared, ' .. prot:report())
            else
               print('failed')
            end
         else
            prot:internalToAtomCoords()
            local s = prot:writePDB()              -- get PDB format text
            if args['w'] then
               utils.writeFile(pdbid .. '.gpdb',s)
            else
               print(s)
            end
         end

      else                             -- did read PDB format data, generate internal coordinates
         --prot:setStartCoords()            -- copy first residue N, CA, C coordinates from input data to each chain residue 1 initCoords list

         prot:atomsToInternalCoords()          -- calculate bond lengths, bond angles and dihedral angles from input coordinates
         if args['a'] then
            --hedron_default = prot:addHedronData(hedron_default)
            --dihedron_default = prot:addDihedronData(dihedron_default)
         elseif args['t'] then
            io.write('testing ' .. arg .. ' ... ')
            io.flush()
            local s0 = prot:writePDB(true)     -- PDB format text without REMARK RFOLD record (timestamp may not match)
            prot:clearAtomCoords()
            prot:internalToAtomCoords()
            local s1 = prot:writePDB(true)
            local c = utils.lineByLineCompare(s0,s1)
            if c then
               print('passed: ' .. c .. ' pdb lines compared, ' .. prot:report())
            else
               print('failed')
            end
         else
            local s = prot:writeInternalCoords()
            if args['w'] then
               utils.writeFile(pdbid .. '.pic',s)
            else
               print(s)
            end
         end
      end
      protein.drop(pdbid)
   end
end
generated by LDoc 1.4.3 Last updated 2017-04-06 12:45:34