• R/O
  • HTTP
  • SSH


No Tags

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer


Commit MetaInfo

Revisão0966df4efa0db839ec8bc6b9ef9028bf8e264615 (tree)
Hora2011-10-06 02:14:55
AutorRick Copeland <rcopeland@geek...>
CommiterRick Copeland

Mensagem de Log

[#2020] Integrating new repo refresh code with existing repo models

Signed-off-by: Rick Copeland <rcopeland@geek.net>

Mudança Sumário


--- a/Allura/allura/model/index.py
+++ b/Allura/allura/model/index.py
@@ -2,43 +2,47 @@ import re
22 import logging
33 from itertools import groupby
44 from cPickle import dumps, loads
5-from datetime import datetime
65 from collections import defaultdict
87 import bson
98 import pymongo
10-from pylons import c, g
9+from pylons import c
12-import ming
11+from ming import collection, Field, Index
1312 from ming import schema as S
1413 from ming.utils import LazyProperty
15-from ming.orm import session
16-from ming.orm import FieldProperty, ForeignIdProperty, RelationProperty
17-from ming.orm.declarative import MappedClass
14+from ming.orm import session, mapper
15+from ming.orm import ForeignIdProperty, RelationProperty
1917 from allura.lib import helpers as h
21-from .session import main_orm_session
19+from .session import main_doc_session, main_orm_session
2321 log = logging.getLogger(__name__)
25-class ArtifactReference(MappedClass):
26- '''ArtifactReference manages the artifact graph.
28- fields are all strs, corresponding to Solr index_ids
29- '''
30- class __mongometa__:
31- session = main_orm_session
32- name = 'artifact_reference'
33- indexes = [ 'references' ]
35- _id = FieldProperty(str)
36- artifact_reference = FieldProperty(S.Object(dict(
37- cls=S.Binary,
38- project_id=S.ObjectId,
39- app_config_id=S.ObjectId,
40- artifact_id=S.Anything(if_missing=None))))
41- references = FieldProperty([str])
23+# Collection definitions
24+ArtifactReferenceDoc = collection(
25+ 'artifact_reference', main_doc_session,
26+ Field('_id', str),
27+ Field('artifact_reference', dict(
28+ cls=S.Binary(),
29+ project_id=S.ObjectId(),
30+ app_config_id=S.ObjectId(),
31+ artifact_id=S.Anything(if_missing=None))),
32+ Field('references', [str], index=True))
34+ShortlinkDoc = collection(
35+ 'shortlink', main_doc_session,
36+ Field('_id', S.ObjectId()),
37+ Field('ref_id', str, index=True),
38+ Field('project_id', S.ObjectId()),
39+ Field('app_config_id', S.ObjectId()),
40+ Field('link', str),
41+ Field('url', str),
42+ Index('link, project_id', 'app_config_id'))
44+# Class definitions
45+class ArtifactReference(object):
4347 @classmethod
4448 def from_artifact(cls, artifact):
@@ -71,28 +75,8 @@ class ArtifactReference(MappedClass):
7175 log.exception('Error loading artifact for %s: %r',
7276 self._id, aref)
74-class Shortlink(MappedClass):
78+class Shortlink(object):
7579 '''Collection mapping shorthand_ids for artifacts to ArtifactReferences'''
76- class __mongometa__:
77- session = main_orm_session
78- name = 'shortlink'
79- indexes = [
80- ('link', 'project_id', 'app_config_id'),
81- ('ref_id',),
82- ]
84- # Stored properties
85- _id = FieldProperty(S.ObjectId)
86- ref_id = ForeignIdProperty(ArtifactReference)
87- project_id = ForeignIdProperty('Project')
88- app_config_id = ForeignIdProperty('AppConfig')
89- link = FieldProperty(str)
90- url = FieldProperty(str)
92- # Relation Properties
93- project = RelationProperty('Project')
94- app_config = RelationProperty('AppConfig')
95- ref = RelationProperty('ArtifactReference')
9781 # Regexes used to find shortlinks
9882 _core_re = r'''(\[
@@ -202,3 +186,12 @@ class Shortlink(MappedClass):
202186 else:
203187 return None
189+# Mapper definitions
190+mapper(ArtifactReference, ArtifactReferenceDoc, main_orm_session)
191+mapper(Shortlink, ShortlinkDoc, main_orm_session, properties=dict(
192+ ref_id = ForeignIdProperty(ArtifactReference),
193+ project_id = ForeignIdProperty('Project'),
194+ app_config_id = ForeignIdProperty('AppConfig'),
195+ project = RelationProperty('Project'),
196+ app_config = RelationProperty('AppConfig'),
197+ ref = RelationProperty(ArtifactReference)))
--- a/Allura/allura/model/repo.py
+++ b/Allura/allura/model/repo.py
@@ -1,77 +1,59 @@
1+import re
2+import sys
3+import logging
4+from hashlib import sha1
5+from itertools import izip, chain
16 from datetime import datetime
7+from collections import defaultdict
3-from ming import Document, Field
9+from pylons import g
11+from ming import Field, Index, collection
412 from ming import schema as S
13+from ming.utils import LazyProperty
14+from ming.orm import mapper
6-from .session import main_doc_session, project_doc_session
16+from allura.lib import utils
17+from allura.lib import helpers as h
8-class Commit(Document):
9- class __mongometa__:
10- name = 'repo_ci'
11- session = main_doc_session
12- indexes = [
13- ('parent_ids',),
14- ('child_ids',),
15- ('repo_ids',)]
16- User = dict(name=str, email=str, date=datetime)
18- _id = Field(str)
19- tree_id = Field(str)
20- committed = Field(User)
21- authored = Field(User)
22- message = Field(str)
23- parent_ids = Field([str])
24- child_ids = Field([str])
25- repo_ids = Field([S.ObjectId()])
19+from .auth import User
20+from .session import main_doc_session, project_doc_session
21+from .session import repository_orm_session
27- def __repr__(self):
28- return '%s %s' % (
29- self._id[:7], self.summary)
23+log = logging.getLogger(__name__)
31- @property
32- def summary(self):
33- if self.message:
34- summary = []
35- for line in self.message.splitlines():
36- line = line.rstrip()
37- if line: summary.append(line)
38- else: return ' '.join(summary)
39- return ' '.join(summary)
40- return ''
25+SUser = dict(name=str, email=str, date=datetime)
26+SObjType=S.OneOf('blob', 'tree', 'submodule')
27+QSIZE = 100
28+README_RE = re.compile('^README(\.[^.]*)?$', re.IGNORECASE)
42- def url(self):
43- return ''
30+# Basic commit information
31+CommitDoc = collection(
32+ 'repo_ci', main_doc_session,
33+ Field('_id', str),
34+ Field('tree_id', str),
35+ Field('committed', SUser),
36+ Field('authored', SUser),
37+ Field('message', str),
38+ Field('parent_ids', [str], index=True),
39+ Field('child_ids', [str], index=True),
40+ Field('repo_ids', [ S.ObjectId() ], index=True))
45- def shorthand_id(self):
46- return ''
42+# Basic tree information
43+TreeDoc = collection(
44+ 'repo_tree', main_doc_session,
45+ Field('_id', str),
46+ Field('tree_ids', [dict(name=str, id=str)]),
47+ Field('blob_ids', [dict(name=str, id=str)]),
48+ Field('other_ids', [dict(name=str, id=str, type=SObjType)]))
48- @property
49- def author_url(self):
50- return ''
52-class Tree(Document):
53- class __mongometa__:
54- name = 'repo_tree'
55- session = main_doc_session
56- ObjType=S.OneOf('blob', 'tree', 'submodule')
58- _id = Field(str)
59- tree_ids = Field([dict(name=str, id=str)])
60- blob_ids = Field([dict(name=str, id=str)])
61- other_ids = Field([dict(name=str, id=str, type=ObjType)])
63-class LastCommit(Document):
64- class __mongometa__:
65- name = 'repo_last_commit'
66- session = project_doc_session
67- indexes = [
68- ( 'repo_id', 'object_id'),
69- ]
71- _id = Field(str)
72- repo_id=Field(S.ObjectId())
73- object_id=Field(str)
74- commit_info = Field(dict(
50+# Information about the last commit to touch a tree/blob
51+LastCommitDoc = collection(
52+ 'repo_last_commit', project_doc_session,
53+ Field('_id', str),
54+ Field('repo_id', S.ObjectId()),
55+ Field('object_id', str),
56+ Field('commit_info', dict(
7557 id=str,
7658 date=datetime,
7759 author=str,
@@ -79,58 +61,298 @@ class LastCommit(Document):
7961 author_url=str,
8062 href=str,
8163 shortlink=str,
82- summary=str))
84- @classmethod
85- def set_last_commit(cls, repo_id, oid, commit):
86- lc = cls(dict(
87- _id='%s:%s' % (repo_id, oid),
88- repo_id=repo_id,
89- object_id=oid,
90- commit_info=dict(
91- id=commit._id,
92- author=commit.authored.name,
93- author_email=commit.authored.email,
94- author_url=commit.author_url,
95- date=commit.authored.date,
96- href=commit.url(),
97- shortlink=commit.shorthand_id(),
98- summary=commit.summary)))
99- lc.m.save(safe=False)
100- return lc
102-class Trees(Document):
103- class __mongometa__:
104- name = 'repo_trees'
105- session = main_doc_session
107- _id = Field(str) # commit ID
108- tree_ids = Field([str]) # tree IDs
110-class DiffInfo(Document):
111- class __mongometa__:
112- name = 'repo_diffinfo'
113- session = main_doc_session
115- _id = Field(str)
116- differences = Field([dict(name=str, lhs_id=str, rhs_id=str)])
118-class BasicBlock(Document):
119- class __mongometa__:
120- name = 'repo_basic_block'
121- session = main_doc_session
122- indexes = [
123- ('commit_ids',) ]
125- _id = Field(str)
126- parent_commit_ids = Field([str])
127- commit_ids = Field([str])
128- commit_times = Field([datetime])
64+ summary=str)),
65+ Index('repo_id', 'object_id'))
67+# List of all trees contained within a commit
68+TreesDoc = collection(
69+ 'repo_trees', main_doc_session,
70+ Field('_id', str),
71+ Field('tree_ids', [str]))
73+# Information about which things were added/removed in commit
74+DiffInfoDoc = collection(
75+ 'repo_diffinfo', main_doc_session,
76+ Field('_id', str),
77+ Field(
78+ 'differences',
79+ [ dict(name=str, lhs_id=str, rhs_id=str)]))
81+# List of commit runs (a run is a linear series of single-parent commits)
82+CommitRunDoc = collection(
83+ 'repo_commitrun', main_doc_session,
84+ Field('_id', str),
85+ Field('parent_commit_ids', [str]),
86+ Field('commit_ids', [str], index=True),
87+ Field('commit_times', [datetime]))
89+class RepoObject(object):
13091 def __repr__(self):
131- return '%s: (P %s, T %s..%s (%d commits))' % (
132- self._id[:6],
133- [ oid[:6] for oid in self.parent_commit_ids ],
134- self.commit_ids[0][:6],
135- self.commit_ids[-1][:6],
136- len(self.commit_ids))
92+ return '<%s %s>' % (
93+ self.__class__.__name__, self._id)
95+ def primary(self):
96+ return self
98+ def index_id(self):
99+ '''Globally unique artifact identifier. Used for
100+ SOLR ID, shortlinks, and maybe elsewhere
101+ '''
102+ id = '%s.%s#%s' % (
103+ self.__class__.__module__,
104+ self.__class__.__name__,
105+ self._id)
106+ return id.replace('.', '/')
108+class Commit(RepoObject):
109+ # Ephemeral attrs
110+ repo=None
112+ @LazyProperty
113+ def author_url(self):
114+ u = User.by_email_address(self.authored.email)
115+ if u: return u.url()
117+ @LazyProperty
118+ def committer_url(self):
119+ u = User.by_email_address(self.committed.email)
120+ if u: return u.url()
122+ @LazyProperty
123+ def tree(self):
124+ if self.tree_id is None:
125+ self.tree_id = self.repo.compute_tree(self)
126+ if self.tree_id is None:
127+ return None
128+ t = Tree.query.get(object_id=self.tree_id)
129+ if t is None:
130+ self.tree_id = self.repo.compute_tree(self)
131+ t = Tree.query.get(object_id=self.tree_id)
132+ if t is not None: t.set_context(self)
133+ return t
135+ @LazyProperty
136+ def summary(self):
137+ message = h.really_unicode(self.message)
138+ first_line = message.split('\n')[0]
139+ return h.text.truncate(first_line, 50)
141+ def get_path(self, path):
142+ '''Return the blob on the given path'''
143+ if path.startswith('/'): path = path[1:]
144+ path_parts = path.split('/')
145+ return self.tree.get_blob(path_parts[-1], path_parts[:-1])
147+ def shorthand_id(self):
148+ return self.repo.shorthand_for_commit(self)
150+ @LazyProperty
151+ def symbolic_ids(self):
152+ return self.repo.symbolics_for_commit(self)
154+ def url(self):
155+ return self.repo.url_for_commit(self)
157+ def log_iter(self, skip, count):
158+ for oids in utils.chunked_iter(commitlog(self._id), QSIZE):
159+ oids = list(oids)
160+ commits = dict(
161+ (ci._id, ci) for ci in self.query.find(dict(
162+ _id={'$in': oids})))
163+ for oid in oids:
164+ ci = commits[oid]
165+ ci.set_context(self.repo)
166+ yield ci
168+ def log(self, skip, count):
169+ return list(self.log_iter(skip, count))
171+ def count_revisions(self):
172+ result = 0
173+ for oid in commitlog(self): result += 1
174+ return result
176+ def context(self):
177+ result = dict(prev=None, next=None)
178+ if self.parent_ids:
179+ result['prev'] = self.query.get(_id=self.parent_ids[0])
180+ if self.child_ids:
181+ result['next'] = self.query.get(_id=self.child_ids[0])
182+ return result
184+class Tree(RepoObject):
185+ # Ephemeral attrs
186+ repo=None
187+ commit=None
188+ parent=None
189+ name=None
191+ def compute_hash(self):
192+ '''Compute a hash based on the contents of the tree. Note that this
193+ hash does not necessarily correspond to any actual DVCS hash.
194+ '''
195+ lines = (
196+ [ 'tree' + x.name + x.id for x in self.tree_ids ]
197+ + [ 'blob' + x.name + x.id for x in self.blob_ids ]
198+ + [ x.type + x.name + x.id for x in self.other_ids ])
199+ sha_obj = sha1()
200+ for line in sorted(lines):
201+ sha_obj.update(line)
202+ return sha_obj.hexdigest()
204+ def set_context(self, commit_or_tree, name=None):
205+ assert commit_or_tree is not self
206+ self.repo = commit_or_tree.repo
207+ if name:
208+ self.commit = commit_or_tree.commit
209+ self.parent = commit_or_tree
210+ self.name = name
211+ else:
212+ self.commit = commit_or_tree
214+ def readme(self):
215+ name = None
216+ text = ''
217+ for x in self.blob_ids:
218+ if README_RE.match(x.name):
219+ name = x.name
220+ text = h.really_unicode(self.repo.blob_text(x.id))
221+ break
222+ if text == '':
223+ text = '<p><em>Empty File</em></p>'
224+ else:
225+ renderer = g.pypeline_markup.renderer(name)
226+ if renderer[1]:
227+ text = g.pypeline_markup.render(name,text)
228+ else:
229+ text = '<pre>%s</pre>' % text
230+ return (name, text)
232+ def ls(self):
233+ # Load last commit info
234+ oids = [ x.id for x in chain(self.tree_ids, self.object_ids, self.other_ids) ]
235+ lc_index = dict(
236+ (lc.object_id, lc.commit)
237+ for lc in LastCommitDoc.m.find(dict(
238+ repo_id=self.repo._id,
239+ object_id={'$in': oids})))
240+ results = []
241+ def _get_last_commit(oid):
242+ lc = lc_index.get(oid)
243+ if lc is None:
244+ lc = dict(
245+ author=None,
246+ author_email=None,
247+ author_url=None,
248+ date=None,
249+ id=None,
250+ href=None,
251+ shortlink=None,
252+ summary=None)
253+ return lc
254+ for x in sorted(self.tree_ids, key=lambda x:x.name):
255+ results.append(dict(
256+ kind='DIR',
257+ name=x.name,
258+ href=x.name + '/',
259+ last_commit=_get_last_commit(x.id)))
260+ for x in sorted(self.blob_ids, key=lambda x:x.name):
261+ results.append(dict(
262+ kind='FILE',
263+ name=x.name,
264+ href=x.name + '/',
265+ last_commit=_get_last_commit(x.id)))
266+ for x in sorted(self.other_ids, key=lambda x:x.name):
267+ results.append(dict(
268+ kind=x.type,
269+ name=x.name,
270+ href=None,
271+ last_commit=_get_last_commit(x.id)))
272+ return results
274+ def path(self):
275+ if self.parent:
276+ assert self.parent is not self
277+ return self.parent.path() + self.name + '/'
278+ else:
279+ return '/'
281+ def url(self):
282+ return self.commit.url() + 'tree' + self.path()
284+ @LazyProperty
285+ def by_name(self):
286+ d = dict((x.name, x) for x in self.other_ids)
287+ d.update(
288+ (x.name, dict(x, type='tree'))
289+ for x in self.tree_ids)
290+ d.update(
291+ (x.name, dict(x, type='blob'))
292+ for x in self.blob_ids)
293+ return d
295+ def is_blob(self, name):
296+ return self.by_name[name].type == 'blob'
298+mapper(Commit, CommitDoc, repository_orm_session)
299+mapper(Tree, TreeDoc, repository_orm_session)
301+def commitlog(commit_id, skip=0, limit=sys.maxint):
303+ seen = set()
304+ def _visit(commit_id):
305+ if commit_id in seen: return
306+ run = CommitRunDoc.m.get(commit_ids=commit_id)
307+ if run is None: return
308+ index = False
309+ for pos, (oid, time) in enumerate(izip(run.commit_ids, run.commit_times)):
310+ if oid == commit_id: index = True
311+ elif not index: continue
312+ seen.add(oid)
313+ ci_times[oid] = time
314+ if pos+1 < len(run.commit_ids):
315+ ci_parents[oid] = [ run.commit_ids[pos+1] ]
316+ else:
317+ ci_parents[oid] = run.parent_commit_ids
318+ for oid in run.parent_commit_ids:
319+ _visit(oid)
321+ def _gen_ids(commit_id, skip, limit):
322+ # Traverse the graph in topo order, yielding commit IDs
323+ commits = set([commit_id])
324+ new_parent = None
325+ while commits and limit:
326+ # next commit is latest commit that's valid to log
327+ if new_parent in commits:
328+ ci = new_parent
329+ else:
330+ ci = max(commits, key=lambda ci:ci_times[ci])
331+ commits.remove(ci)
332+ if skip:
333+ skip -= 1
334+ continue
335+ else:
336+ limit -= 1
337+ yield ci
338+ # remove this commit from its parents children and add any childless
339+ # parents to the 'ready set'
340+ new_parent = None
341+ for oid in ci_parents[ci]:
342+ children = ci_children[oid]
343+ children.discard(ci)
344+ if not children:
345+ commits.add(oid)
346+ new_parent = oid
348+ # Load all the runs to build a commit graph
349+ ci_times = {}
350+ ci_parents = {}
351+ ci_children = defaultdict(set)
352+ log.info('Build commit graph')
353+ _visit(commit_id)
354+ for oid, parents in ci_parents.iteritems():
355+ for ci_parent in parents:
356+ ci_children[ci_parent].add(oid)
358+ return _gen_ids(commit_id, skip, limit)
--- a/Allura/allura/model/repository.py
+++ b/Allura/allura/model/repository.py
@@ -49,6 +49,9 @@ class RepositoryImplementation(object):
4949 def commit(self, revision): # pragma no cover
5050 raise NotImplementedError, 'commit'
52+ def all_commit_ids(self): # pragma no cover
53+ raise NotImplementedError, 'all_commit_ids'
5255 def new_commits(self, all_commits=False): # pragma no cover
5356 '''Return a list of native commits in topological order (heads first).
@@ -74,6 +77,10 @@ class RepositoryImplementation(object):
7477 '''Refresh the data in the commit object 'ci' with data from the repo'''
7578 raise NotImplementedError, 'refresh_commit'
80+ def refresh_commit_info(self, oid): # pragma no cover
81+ '''Refresh the data in the commit with id oid'''
82+ raise NotImplementedError, 'refresh_commit_info'
7784 def refresh_tree(self, tree): # pragma no cover
7885 '''Refresh the data in the tree object 'tree' with data from the repo'''
7986 raise NotImplementedError, 'refresh_tree'
@@ -105,8 +112,8 @@ class RepositoryImplementation(object):
105112 '''Return a file-like object that contains the contents of the blob'''
106113 raise NotImplementedError, 'open_blob'
108- def shorthand_for_commit(self, commit):
109- return '[%s]' % commit.object_id[:6]
115+ def shorthand_for_commit(self, oid):
116+ return '[%s]' % oid
111118 def symbolics_for_commit(self, commit):
112119 '''Return symbolic branch and tag names for a commit.
@@ -188,6 +195,10 @@ class Repository(Artifact):
188195 return self._impl.init()
189196 def commit(self, rev):
190197 return self._impl.commit(rev)
198+ def all_commit_ids(self):
199+ return self._impl.all_commit_ids()
200+ def refresh_commit_info(self, oid, seen):
201+ return self._impl.refresh_commit_info(oid, seen)
191202 def commit_context(self, commit):
192203 return self._impl.commit_context(commit)
193204 def open_blob(self, blob):
--- a/Allura/test-light.py
+++ b/Allura/test-light.py
@@ -3,60 +3,49 @@ import logging
33 from collections import defaultdict
44 from itertools import chain, izip
55 from datetime import datetime
6+from cPickle import dumps
8+import bson
79 from pylons import c
810 from pymongo.errors import DuplicateKeyError
1012 from ming.base import Object
12-from allura import model as M
1314 from allura.lib import helpers as h
1415 from allura.lib import utils
16+from allura.model.repo import CommitDoc, TreeDoc, TreesDoc, DiffInfoDoc
17+from allura.model.repo import LastCommitDoc, CommitRunDoc
18+from allura.model.repo import Commit
19+from allura.model.index import ArtifactReferenceDoc, ShortlinkDoc
1621 log = logging.getLogger(__name__)
1823 QSIZE=100
20-def dolog():
21- h.set_context('test', 'code')
22- repo = c.app.repo._impl._git
23- oid = repo.commit(repo.heads[0]).hexsha
24- log.info('start')
25- for i, ci in enumerate(commitlog(oid)):
26- print repr(ci)
27- log.info('done')
2925 def main():
3026 if len(sys.argv) > 1:
3127 h.set_context('test')
3228 c.project.install_app('Git', 'code', 'Code', init_from_url='/home/rick446/src/forge')
3329 h.set_context('test', 'code')
34- M.repo.Commit.m.remove({})
35- M.repo.Tree.m.remove({})
36- M.repo.Trees.m.remove({})
37- M.repo.DiffInfo.m.remove({})
38- M.repo.LastCommit.m.remove({})
39- M.repo.BasicBlock.m.remove({})
40- repo = c.app.repo._impl._git
42- # Get all commits
43- seen = set()
44- all_commit_ids = []
45- for head in repo.heads:
46- for ci in repo.iter_commits(head, topo_order=True):
47- if ci.binsha in seen: continue
48- seen.add(ci.binsha)
49- all_commit_ids.append(ci.hexsha)
51- # Skip commits that are already in the DB
30+ CommitDoc.m.remove({})
31+ TreeDoc.m.remove({})
32+ TreesDoc.m.remove({})
33+ DiffInfoDoc.m.remove({})
34+ LastCommitDoc.m.remove({})
35+ CommitRunDoc.m.remove({})
37+ # Get all commits (repo-specific)
38+ all_commit_ids = list(c.app.repo.all_commit_ids())
40+ # Skip commits that are already in the DB (repo-agnostic)
5241 commit_ids = unknown_commit_ids(all_commit_ids)
5342 # commit_ids = commit_ids[:500]
5443 log.info('Refreshing %d commits', len(commit_ids))
56- # Refresh commits
45+ # Refresh commits (repo-specific)
46+ seen = set()
5747 for i, oid in enumerate(commit_ids):
58- ci = repo.rev_parse(oid)
59- refresh_commit_info(ci, seen)
48+ c.app.repo.refresh_commit_info(oid, seen)
6049 if (i+1) % 100 == 0:
6150 log.info('Refresh commit info %d: %s', (i+1), oid)
@@ -64,14 +53,14 @@ def main():
6453 # Everything below here is repo-agnostic
6554 #############################################
67- refresh_repo(commit_ids, c.app.repo._id)
56+ refresh_repo(commit_ids, c.app.repo)
6958 # Refresh child references
7059 seen = set()
7160 parents = set()
7362 for i, oid in enumerate(commit_ids):
74- ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next()
63+ ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
7564 refresh_children(ci)
7665 seen.add(ci._id)
7766 parents.update(ci.parent_ids)
@@ -79,28 +68,22 @@ def main():
7968 log.info('Refresh child (a) info %d: %s', (i+1), ci._id)
8069 for j, oid in enumerate(parents-seen):
8170 try:
82- ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next()
71+ ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
8372 except StopIteration:
8473 continue
8574 refresh_children(ci)
8675 if (i + j + 1) % 100 == 0:
8776 log.info('Refresh child (b) info %d: %s', (i + j + 1), ci._id)
89- # Refresh basic blocks
90- bbb = BasicBlockBuilder(commit_ids)
91- bbb.run()
92- bbb.cleanup()
94- # Verify the log
95- log.info('Logging via basic blocks')
96- for i, ci in enumerate(commitlog(commit_ids[0])):
97- pass
98- log.info('... done (%d commits from %s)', i+1, commit_ids[0])
78+ # Refresh commit runs
79+ rb = CommitRunBuilder(commit_ids)
80+ rb.run()
81+ rb.cleanup()
10083 # Refresh trees
10184 cache = {}
10285 for i, oid in enumerate(commit_ids):
103- ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next()
86+ ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
10487 cache = refresh_commit_trees(ci, cache)
10588 if (i+1) % 100 == 0:
10689 log.info('Refresh commit trees %d: %s', (i+1), ci._id)
@@ -108,13 +91,13 @@ def main():
10891 # Compute diffs
10992 cache = {}
11093 for i, oid in enumerate(commit_ids):
111- ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next()
94+ ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
11295 compute_diffs(c.app.repo._id, cache, ci)
11396 if (i+1) % 100 == 0:
11497 log.info('Compute diffs %d: %s', (i+1), ci._id)
11699 def refresh_commit_trees(ci, cache):
117- trees_doc = M.repo.Trees(dict(
100+ trees_doc = TreesDoc(dict(
118101 _id=ci._id,
119102 tree_ids = list(trees(ci.tree_id, cache))))
120103 trees_doc.m.save(safe=False)
@@ -124,10 +107,10 @@ def refresh_commit_trees(ci, cache):
124107 return new_cache
126109 def refresh_commit_info(ci, seen):
127- if M.repo.Commit.m.find(dict(_id=ci.hexsha)).count() != 0:
110+ if CommitDoc.m.find(dict(_id=ci.hexsha)).count() != 0:
128111 return False
129112 try:
130- ci_doc = M.repo.Commit(dict(
113+ ci_doc = CommitDoc(dict(
131114 _id=ci.hexsha,
132115 tree_id=ci.tree.hexsha,
133116 committed = Object(
@@ -149,109 +132,128 @@ def refresh_commit_info(ci, seen):
149132 refresh_tree(ci.tree, seen)
150133 return True
152-def refresh_repo(commit_ids, repo_id):
135+def refresh_repo(commit_ids, repo):
153136 for oids in utils.chunked_iter(commit_ids, QSIZE):
154137 oids = list(oids)
155- M.repo.Commit.m.update_partial(
138+ # Create shortlinks and artifactrefs
139+ for oid in oids:
140+ index_id = 'allura.model.repo.Commit#' + oid
141+ ref = ArtifactReferenceDoc(dict(
142+ _id=index_id,
143+ artifact_reference=dict(
144+ cls=dumps(Commit),
145+ project_id=repo.app.config.project_id,
146+ app_config_id=repo.app.config._id,
147+ artifact_id=oid),
148+ references=[]))
149+ link = ShortlinkDoc(dict(
150+ _id=bson.ObjectId(),
151+ ref_id=index_id,
152+ project_id=repo.app.config.project_id,
153+ app_config_id=repo.app.config._id,
154+ link=repo.shorthand_for_commit(oid),
155+ url=repo.url() + 'ci/' + oid + '/'))
156+ ref.m.save(safe=False, validate=False)
157+ link.m.save(safe=False, validate=False)
158+ CommitDoc.m.update_partial(
156159 dict(
157160 _id={'$in': oids},
158- repo_ids={'$ne': repo_id}),
159- {'$addToSet': dict(repo_ids=repo_id)},
161+ repo_ids={'$ne': repo._id}),
162+ {'$addToSet': dict(repo_ids=repo._id)},
160163 multi=True)
162165 def refresh_children(ci):
163- M.repo.Commit.m.update_partial(
166+ CommitDoc.m.update_partial(
164167 dict(_id={'$in': ci.parent_ids}),
165168 {'$addToSet': dict(child_ids=ci._id)},
166169 multi=True)
168-class BasicBlockBuilder(object):
171+class CommitRunBuilder(object):
170173 def __init__(self, commit_ids):
171174 self.commit_ids = commit_ids
172- self.block_index = {} # by commit ID
173- self.blocks = {} # by block ID
174- self.reasons = {} # reasons to stop merging blocks
175+ self.run_index = {} # by commit ID
176+ self.runs = {} # by run ID
177+ self.reasons = {} # reasons to stop merging runs
176179 def run(self):
177180 for oids in utils.chunked_iter(self.commit_ids, QSIZE):
178181 oids = list(oids)
179- commits = list(M.repo.Commit.m.find(dict(_id={'$in':oids})))
182+ commits = list(CommitDoc.m.find(dict(_id={'$in':oids})))
180183 for ci in commits:
181- if ci._id in self.block_index: continue
182- self.block_index[ci._id] = ci._id
183- self.blocks[ci._id] = M.repo.BasicBlock(dict(
184+ if ci._id in self.run_index: continue
185+ self.run_index[ci._id] = ci._id
186+ self.runs[ci._id] = CommitRunDoc(dict(
184187 _id=ci._id,
185188 parent_commit_ids=ci.parent_ids,
186189 commit_ids=[ci._id],
187190 commit_times=[ci.authored.date]))
188- self.merge_blocks()
189- log.info('%d basic blocks', len(self.blocks))
190- for bid, bb in sorted(self.blocks.items()):
191- log.info('%32s: %r', self.reasons.get(bid, 'none'), bb)
192- for bb in self.blocks.itervalues():
193- bb.m.save()
194- return self.blocks
196- def _all_blocks(self):
197- blocks = {}
191+ self.merge_runs()
192+ log.info('%d runs', len(self.runs))
193+ for rid, run in sorted(self.runs.items()):
194+ log.info('%32s: %r', self.reasons.get(rid, 'none'), run._id)
195+ for run in self.runs.itervalues():
196+ run.m.save()
197+ return self.runs
199+ def _all_runs(self):
200+ runs = {}
198201 for oids in utils.chunked_iter(self.commit_ids, QSIZE):
199202 oids = list(oids)
200- for bb in M.repo.BasicBlock.m.find(dict(commit_ids={'$in': oids})):
201- blocks[bb._id] = bb
202- seen_bids = set()
203- blocks = blocks.values()
204- while blocks:
205- bb = blocks.pop()
206- if bb._id in seen_bids: continue
207- seen_bids.add(bb._id)
208- yield bb
209- for bb in M.repo.BasicBlock.m.find(
210- dict(commit_ids={'$in':bb.parent_commit_ids})):
211- blocks.append(bb)
203+ for run in CommitRunDoc.m.find(dict(commit_ids={'$in': oids})):
204+ runs[run._id] = run
205+ seen_run_ids = set()
206+ runs = runs.values()
207+ while runs:
208+ run = runs.pop()
209+ if run._id in seen_run_ids: continue
210+ seen_run_ids.add(run._id)
211+ yield run
212+ for run in CommitRunDoc.m.find(
213+ dict(commit_ids={'$in':run.parent_commit_ids})):
214+ runs.append(run)
213216 def cleanup(self):
214- '''Delete non-maximal basic blocks'''
215- for bb1 in self._all_blocks():
216- for bb2 in M.repo.BasicBlock.m.find(dict(
217- commit_ids=bb1.commit_ids[0])):
218- if bb2._id == bb1._id: continue
219- log.info('... delete %r (part of %r)', bb2, bb1)
220- import pdb; pdb.set_trace()
221- bb2.m.delete()
223- def merge_blocks(self):
217+ '''Delete non-maximal runs'''
218+ for run1 in self._all_runs():
219+ for run2 in CommitRunDoc.m.find(dict(
220+ commit_ids=run1.commit_ids[0])):
221+ if run1._id == run2._id: continue
222+ log.info('... delete %r (part of %r)', run2, run1)
223+ run2.m.delete()
225+ def merge_runs(self):
224226 while True:
225- for bid, bb in self.blocks.iteritems():
226- if len(bb.parent_commit_ids) != 1:
227- self.reasons[bid] = '%d parents' % len(bb.parent_commit_ids)
227+ for run_id, run in self.runs.iteritems():
228+ if len(run.parent_commit_ids) != 1:
229+ self.reasons[run_id] = '%d parents' % len(run.parent_commit_ids)
228230 continue
229- p_oid = bb.parent_commit_ids[0]
230- p_bid = self.block_index.get(p_oid)
231- if p_bid is None:
232- self.reasons[bid] = 'parent commit not found'
231+ p_oid = run.parent_commit_ids[0]
232+ p_run_id = self.run_index.get(p_oid)
233+ if p_run_id is None:
234+ self.reasons[run_id] = 'parent commit not found'
233235 continue
234- p_bb = self.blocks.get(p_bid)
235- if p_bb is None:
236- self.reasons[bid] = 'parent block not found'
236+ p_run = self.runs.get(p_run_id)
237+ if p_run is None:
238+ self.reasons[run_id] = 'parent run not found'
237239 continue
238- if p_bb.commit_ids[0] != p_oid:
239- self.reasons[bid] = 'parent does not start with parent commit'
240+ if p_run.commit_ids[0] != p_oid:
241+ self.reasons[run_id] = 'parent does not start with parent commit'
240242 continue
241- bb.commit_ids += p_bb.commit_ids
242- bb.commit_times += p_bb.commit_times
243- bb.parent_commit_ids = p_bb.parent_commit_ids
244- for oid in p_bb.commit_ids:
245- self.block_index[oid] = bid
243+ run.commit_ids += p_run.commit_ids
244+ run.commit_times += p_run.commit_times
245+ run.parent_commit_ids = p_run.parent_commit_ids
246+ for oid in p_run.commit_ids:
247+ self.run_index[oid] = run_id
246248 break
247249 else:
248250 break
249- del self.blocks[p_bid]
251+ del self.runs[p_run_id]
251253 def refresh_tree(t, seen):
252254 if t.binsha in seen: return
253255 seen.add(t.binsha)
254- doc = M.repo.Tree(dict(
256+ doc = TreeDoc(dict(
255257 _id=t.hexsha,
256258 tree_ids=[],
257259 blob_ids=[],
@@ -274,7 +276,7 @@ def trees(id, cache):
274276 yield id
275277 entries = cache.get(id, None)
276278 if entries is None:
277- t = M.repo.Tree.m.get(_id=id)
279+ t = TreeDoc.m.get(_id=id)
278280 entries = [ o.id for o in t.tree_ids ]
279281 cache[id] = entries
280282 for i in entries:
@@ -284,7 +286,7 @@ def trees(id, cache):
284286 def unknown_commit_ids(all_commit_ids):
285287 result = []
286288 for chunk in utils.chunked_iter(all_commit_ids, QSIZE):
287- q = M.repo.Commit.m.find(_id={'$in':chunk})
289+ q = CommitDoc.m.find(_id={'$in':chunk})
288290 known_commit_ids = set(ci._id for ci in q)
289291 result += [ oid for oid in chunk if oid not in known_commit_ids ]
290292 return result
@@ -298,20 +300,20 @@ def compute_diffs(repo_id, tree_cache, rhs_ci):
298300 for xx in _walk_tree(tree_index[x.id], tree_index):
299301 yield xx
301- rhs_tree_ids = M.repo.Trees.m.get(_id=rhs_ci._id).tree_ids
303+ rhs_tree_ids = TreesDoc.m.get(_id=rhs_ci._id).tree_ids
302304 if rhs_ci.parent_ids:
303- lhs_ci = M.repo.Commit.m.get(_id=rhs_ci.parent_ids[0])
305+ lhs_ci = CommitDoc.m.get(_id=rhs_ci.parent_ids[0])
304306 else:
305307 lhs_ci = None
306308 if lhs_ci is not None:
307- lhs_tree_ids = M.repo.Trees.m.get(_id=lhs_ci._id).tree_ids
309+ lhs_tree_ids = TreesDoc.m.get(_id=lhs_ci._id).tree_ids
308310 else:
309311 lhs_tree_ids = []
310312 new_tree_ids = [
311313 tid for tid in chain(lhs_tree_ids, rhs_tree_ids)
312314 if tid not in tree_cache ]
313315 tree_index = dict(
314- (t._id, t) for t in M.repo.Tree.m.find(dict(_id={'$in': new_tree_ids}),validate=False))
316+ (t._id, t) for t in TreeDoc.m.find(dict(_id={'$in': new_tree_ids}),validate=False))
315317 tree_index.update(tree_cache)
316318 rhs_tree_ids_set = set(rhs_tree_ids)
317319 tree_cache.clear()
@@ -328,83 +330,17 @@ def compute_diffs(repo_id, tree_cache, rhs_ci):
328330 dict(name=name, lhs_id=lhs_id, rhs_id=rhs_id))
329331 # Set last commit info
330332 if rhs_id is not None:
331- M.repo.LastCommit.set_last_commit(repo_id, rhs_id, rhs_ci)
333+ _set_last_commit(repo_id, rhs_id, rhs_ci)
332334 rhs_tree = tree_index.get(rhs_id, None)
333335 if rhs_tree is not None:
334336 for oid in _walk_tree(rhs_tree, tree_index):
335- M.repo.LastCommit.set_last_commit(repo_id, oid, rhs_ci)
336- di = M.repo.DiffInfo(dict(
337+ _set_last_commit(repo_id, oid, rhs_ci)
338+ di = DiffInfoDoc(dict(
337339 _id=rhs_ci._id,
338340 differences=differences))
339341 di.m.save()
340342 return tree_cache
342-def commitlog(commit_id, skip=0, limit=sys.maxint):
344- seen = set()
345- def _visit(commit_id):
346- if commit_id in seen: return
347- bb = M.repo.BasicBlock.m.get(commit_ids=commit_id)
348- if bb is None: return
349- index = False
350- for pos, (oid, time) in enumerate(izip(bb.commit_ids, bb.commit_times)):
351- if oid == commit_id: index = True
352- elif not index: continue
353- seen.add(oid)
354- ci_times[oid] = time
355- if pos+1 < len(bb.commit_ids):
356- ci_parents[oid] = [ bb.commit_ids[pos+1] ]
357- else:
358- ci_parents[oid] = bb.parent_commit_ids
359- for oid in bb.parent_commit_ids:
360- _visit(oid)
362- def _gen_ids(commit_id, skip, limit):
363- # Traverse the graph in topo order, yielding commit IDs
364- commits = set([commit_id])
365- new_parent = None
366- while commits and limit:
367- # next commit is latest commit that's valid to log
368- if new_parent in commits:
369- ci = new_parent
370- else:
371- ci = max(commits, key=lambda ci:ci_times[ci])
372- commits.remove(ci)
373- if skip:
374- skip -= 1
375- continue
376- else:
377- limit -= 1
378- yield ci
379- # remove this commit from its parents children and add any childless
380- # parents to the 'ready set'
381- new_parent = None
382- for oid in ci_parents[ci]:
383- children = ci_children[oid]
384- children.discard(ci)
385- if not children:
386- commits.add(oid)
387- new_parent = oid
389- # Load all the blocks to build a commit graph
390- ci_times = {}
391- ci_parents = {}
392- ci_children = defaultdict(set)
393- log.info('Build commit graph')
394- _visit(commit_id)
395- for oid, parents in ci_parents.iteritems():
396- for ci_parent in parents:
397- ci_children[ci_parent].add(oid)
399- # Convert oids to commit objects
400- log.info('Traverse commit graph')
401- for oids in utils.chunked_iter(_gen_ids(commit_id, skip, limit), QSIZE):
402- oids = list(oids)
403- index = dict(
404- (ci._id, ci) for ci in M.repo.Commit.m.find(dict(_id={'$in': oids})))
405- for oid in oids:
406- yield index[oid]
408344 def _diff_trees(lhs, rhs, index, *path):
409345 def _fq(name):
410346 return '/'.join(reversed(
@@ -441,6 +377,24 @@ def _diff_trees(lhs, rhs, index, *path):
441377 for name, id in rhs_blob_ids.items():
442378 yield (_fq(name), None, id)
380+def _set_last_commit(repo_id, oid, commit):
381+ lc = LastCommitDoc(dict(
382+ _id='%s:%s' % (repo_id, oid),
383+ repo_id=repo_id,
384+ object_id=oid,
385+ commit_info=dict(
386+ id=commit._id,
387+ author=commit.authored.name,
388+ author_email=commit.authored.email,
389+ date=commit.authored.date,
390+ # author_url=commit.author_url,
391+ # href=commit.url(),
392+ # shortlink=commit.shorthand_id(),
393+ # summary=commit.summary
394+ )))
395+ lc.m.save(safe=False)
396+ return lc
444398 if __name__ == '__main__':
445399 main()
446400 # dolog()
--- a/ForgeGit/forgegit/model/git_repo.py
+++ b/ForgeGit/forgegit/model/git_repo.py
@@ -117,6 +117,14 @@ class GitImplementation(M.RepositoryImplementation):
117117 result.set_context(self._repo)
118118 return result
120+ def all_commit_ids(self):
121+ seen = set()
122+ for head in self._git.heads:
123+ for ci in self._git.iter_commits(head, topo_order=True):
124+ if ci.binsha in seen: continue
125+ seen.add(ci.binsha)
126+ yield ci.hexsha
120128 def new_commits(self, all_commits=False):
121129 commits = list(self._git.iter_commits(topo_order=True))
122130 if all_commits: return commits
@@ -184,6 +192,57 @@ class GitImplementation(M.RepositoryImplementation):
184192 self._build_manifest(native_ci)
185193 self.refresh_tree(root_entry)
195+ def refresh_commit_info(self, oid, seen):
196+ from allura.model.repo import CommitDoc
197+ if CommitDoc.m.find(dict(_id=oid)).count():
198+ return False
199+ try:
200+ ci = self._git.rev_parse(oid)
201+ ci_doc = CommitDoc(dict(
202+ _id=ci.hexsha,
203+ tree_id=ci.tree.hexsha,
204+ committed = Object(
205+ name=h.really_unicode(ci.committer.name),
206+ email=h.really_unicode(ci.committer.email),
207+ date=datetime.utcfromtimestamp(
208+ ci.committed_date-ci.committer_tz_offset)),
209+ authored = Object(
210+ name=h.really_unicode(ci.author.name),
211+ email=h.really_unicode(ci.author.email),
212+ date=datetime.utcfromtimestamp(
213+ ci.authored_date-ci.author_tz_offset)),
214+ message=h.really_unicode(ci.message or ''),
215+ child_ids=[],
216+ parent_ids = [ p.hexsha for p in ci.parents ]))
217+ ci_doc.m.insert(safe=True)
218+ except DuplicateKeyError:
219+ return False
220+ self.refresh_tree_info(ci.tree, seen)
221+ return True
223+ def refresh_tree_info(self, tree, seen):
224+ from allura.model.repo import TreeDoc
225+ if tree.binsha in seen: return
226+ seen.add(tree.binsha)
227+ doc = TreeDoc(dict(
228+ _id=tree.hexsha,
229+ tree_ids=[],
230+ blob_ids=[],
231+ other_ids=[]))
232+ for o in tree:
233+ obj = Object(
234+ name=h.really_unicode(o.name),
235+ id=o.hexsha)
236+ if o.type == 'tree':
237+ self.refresh_tree_info(o, seen)
238+ doc.tree_ids.append(obj)
239+ elif o.type == 'blob':
240+ doc.blob_ids.append(obj)
241+ else:
242+ obj.type = o.type
243+ doc.other_ids.append(obj)
244+ doc.m.save(safe=False)
187246 def _build_manifest(self, native_ci):
188247 '''Build the manifest for this commit (mapof all paths to trees/blobs)